BSF Biomineralization Project: RNAseq Expression and Functional Enrichment Analysis

This script is based off of Langfelder P, Horvath S (2008) WGCNA: an R package for weighted correlation network analysis. BMC Bioinformatics 2008, 9:559 (link to paper)

Set up workspace

Import necessary libraries

library("tidyverse")
library("genefilter")
library("DESeq2")
library("RColorBrewer")
library("WGCNA")
library("flashClust")
library("gridExtra")
library("ComplexHeatmap")
library("goseq")
library("dplyr")
library("clusterProfiler")
library("simplifyEnrichment")

Data input, cleaning, and pre-processing

Import the data files

treatmentinfo <- read.csv("Sample_Info/RNAseq_data.csv", header = TRUE, sep = ",")
head(treatmentinfo)
##   sample_id       time_point treatment extraction_batch
## 1      X119 Unfertilized_egg       AMB                0
## 2      X120 Unfertilized_egg       AMB                0
## 3      X121 Unfertilized_egg       AMB               12
## 4      X127   Fertilized_egg       AMB               10
## 5      X132   Fertilized_egg       AMB               10
## 6      X153         Cleavage       AMB                9
gcount <- as.data.frame(read.csv("1-QC-Align-Assemble/Output/gene_count_matrix.csv", row.names="gene_id"), colClasses = double)
head(gcount)
##                    X1101 X119 X120 X121 X127 X128 X129 X130 X131 X132 X133 X134
## g16981               124    0    0   11    0   22    0    0    0    0    0    0
## g16980                 8    0    0    0    0    0    0    0    0    0    0    0
## g16983                 0    0    0    0    0    0    0    0    0    0    0    0
## adi2mcaRNA10886_R1     0    0    0    0    0    0    0    0    0    0    0    0
## g16985                13    0    0    0    0    0    0    0    0    0    0    0
## g16984                54    0    0    0    0    0    0    0    0    0    0    0
##                    X153 X154 X1548 X155 X156 X157 X158 X159 X160 X162 X1628
## g16981                0    0    47    0    0   22    0   50   53   31   182
## g16980                0    0     0    0    0    0    0    0    0    0     7
## g16983                0    0     0    0    0    0    0    0    0    0     5
## adi2mcaRNA10886_R1    0    0     0    0    0    0    0    0    0    8     0
## g16985                0    0     1    0    0    0    0    0    0    0    13
## g16984                0    0     0    0    0    4    0    0    0    0     0
##                    X163 X164 X165 X166 X167 X168 X169 X179 X180 X181 X182 X183
## g16981               25    0   45   35    0   40   10   15   11    0    0   13
## g16980                0    0    0    0    0    0    0    0    0    0    0    0
## g16983                0    0    0    2    4    4    0    0    0    0    0    0
## adi2mcaRNA10886_R1    0    0    0   15    0    3    0    0    0    0    0    0
## g16985                0    0    0    0    0    0    0    0    0    0    0    0
## g16984                0    0    0    0    0    0    0    0    0    0    0    0
##                    X184 X185 X186 X212 X215 X218 X221 X359 X361 X363 X365 X367
## g16981               26    0    0    0    0   36   11   28   28   33  100   16
## g16980                0    0    0    0    0    4    0    0    0    4    0    4
## g16983                0    0    3    0    0    0    0    0    0    0    0    0
## adi2mcaRNA10886_R1    0    0    0    0    0    0    0    0   14    6    0   14
## g16985                0    0    0    0    4    0    0   16    2    3    0    0
## g16984                0    0    0    1    0    4    3    4   14   19   36    9
##                    X371 X373 X375 X379
## g16981               81   24  180   30
## g16980                0    0    0    3
## g16983                0    0    1    2
## adi2mcaRNA10886_R1    0    0    0    5
## g16985                1    4   10    4
## g16984               21   19   15   11
dim(gcount)
## [1] 63227    51

Quality-filter gene counts

Pre-filtering our dataset to reduce the memory size dataframe, increase the speed of the transformation and testing functions, and improve quality of statistical analysis by removing low-coverage counts. Removed counts could represent outliers in the data and removing these improves sensitivity of statistical tests. Here we will filter out the genes that are only present in fewer than two of the 24 ambient samples.

#keep only ambient treatmentinfo and count data
dev <- c("AMB")
treatmentinfo_dev <- filter(treatmentinfo, treatment %in% dev)
dim(treatmentinfo_dev) #rows should be 24
## [1] 24  4
# delete sample columns corresponding to low and extreme low samples by mapping Null value to them
gcount_dev <- gcount[treatmentinfo_dev$sample_id]
dim(gcount_dev) #columns should be 24
## [1] 63227    24
#create filter for the counts data
#gfiltdev <- rowSums(count(gcount_dev)) > 0
#set filter values for PoverA, P=100% percent of the samples have counts over A=10. This means that only 2 out of 24 (0.083) samples need to have counts over 10. Our smallest sample size for our life stages is two (fertilized egg, mid-gastrula, early-gastrula). By setting 2/24 as the P, this means if a particular gene is expressed only in 1 of these smallest life stages, it will be included in the analysis.
filt <- filterfun(pOverA(0.083,10))

#create filter for the counts data
gfiltdev <- genefilter(gcount_dev, filt)

#identify genes to keep by count filter
gkeepdev <- gcount_dev[gfiltdev,]


#identify genes to keep by count filter
gkeepdev <- gcount_dev[gfiltdev,]

#identify gene lists
gn.keepdev <- rownames(gkeepdev)

#gene count data filtered in PoverA, P percent of the samples have counts over A
gcount_filt_dev <- as.data.frame(gcount_dev[which(rownames(gcount_dev) %in% gn.keepdev),])

#How many rows do we have before and after filtering?
nrow(gcount_dev) #Before
## [1] 63227
nrow(gcount_filt_dev) #After
## [1] 32772

Quality-check of datasets

In order for the DESeq2 algorithms to work, the SampleIDs on the treatmentinfo file and count matrices have to match exactly and in the same order. The following R clump will check to make sure that these match.

#Checking that all row and column names match. Should return "TRUE"
all(rownames(treatmentinfo_dev) %in% colnames(gcount_filt_dev))
## [1] FALSE
all(rownames(treatmentinfo_dev) == colnames(gcount_filt_dev)) 
## [1] FALSE

Read normalization

We are now going normalize our read counts using VST-normalization in DESeq2

Construct the DESeq2 dataset

Merge the treatment and time_point columns into a new column , group. Set group as a factor.

treatmentinfo_dev$time_point <- factor(treatmentinfo_dev$time_point, levels = c("Unfertilized_egg", "Fertilized_egg", "Cleavage", "Prawn_chip", "Early_gastrula", "Mid_gastrula", "Late_gastrula", "Planula", "Adult"))

Create a DESeqDataSet design from gene count matrix and labels. Here we set the design to look at time_point to test for any differences in gene expression across timepoints.

#Set DESeq2 design
gdds_dev <- DESeqDataSetFromMatrix(countData = gcount_filt_dev,
                              colData = treatmentinfo_dev,
                              design = ~time_point)

Log-transform the count data

First we are going to log-transform the data using a variance stabilizing transforamtion (VST). This is only for visualization purposes. Essentially, this is roughly similar to putting the data on the log2 scale. It will deal with the sampling variability of low counts by calculating within-group variability (if blind=FALSE). Importantly, it does not use the design to remove variation in the data, and so can be used to examine if there may be any variability do to technical factors such as extraction batch effects.

To do this we first need to calculate the size factors of our samples. This is a rough estimate of how many reads each sample contains compared to the others. In order to use VST (the faster log2 transforming process) to log-transform our data, the size factors need to be less than 4. Otherwise, there could be artefacts in our results.

SF.gdds_dev <- estimateSizeFactors(gdds_dev) #estimate size factors to determine if we can use vst  to transform our data. Size factors should be less than for to use vst
print(sizeFactors(SF.gdds_dev)) #View size factors
##      X119      X120      X121      X127      X132      X153      X154      X159 
## 1.0519967 0.5686680 0.7558557 0.5351080 0.5251546 1.0029584 1.8794576 0.7811492 
##      X162      X163      X167      X179      X180      X184      X212      X215 
## 1.5226504 1.7499897 1.7848493 1.0151292 0.8308898 0.9339607 1.1350398 0.8053938 
##      X218      X221      X359      X361      X375     X1101     X1548     X1628 
## 1.6504284 1.1560682 1.4281348 1.3679823 1.0904351 1.2882416 0.8315212 0.9180700

Our size factors are all less than 4, so we can use VST!

gvst_dev <- vst(gdds_dev, blind=FALSE) #apply a variance stabilizing transforamtion to minimize effects of small counts and normalize wrt library size
head(assay(gvst_dev), 3) #view transformed gene count data
##            X119     X120     X121     X127     X132     X153     X154     X159
## g16981 8.873991 8.873991 9.127751 8.873991 8.873991 8.873991 8.873991 9.403881
## g16985 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991
## g16984 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991
##            X162     X163     X167     X179     X180     X184     X212     X215
## g16981 9.173980 9.125417 8.873991 9.129687 9.116050 9.224550 8.873991 8.873991
## g16985 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 9.022359
## g16984 8.873991 8.873991 8.873991 8.873991 8.873991 8.873991 8.936504 8.873991
##            X218     X221     X359     X361     X375    X1101    X1548    X1628
## g16981 9.184462 9.079270 9.168398 9.174778 9.717659 9.521989 9.372258 9.796006
## g16985 8.873991 8.873991 9.096707 8.954515 9.075528 9.085384 8.947025 9.124311
## g16984 8.977659 8.981261 8.985432 9.086872 9.120722 9.303627 8.873991 8.873991
Plot a heatmap of sample-to-sample distances
gsampleDists_dev <- dist(t(assay(gvst_dev))) #calculate distance matix
gsampleDistMatrix_dev <- as.matrix(gsampleDists_dev) #distance matrix
rownames(gsampleDistMatrix_dev) <- colnames(gvst_dev) #assign row names
colnames(gsampleDistMatrix_dev) <- NULL #assign col names
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) #assign colors
pheatmap(gsampleDistMatrix_dev, #plot matrix
         clustering_distance_rows=gsampleDists_dev, #cluster rows
         clustering_distance_cols=gsampleDists_dev, #cluster columns
         col=colors) #set colors

##### Principal component plot of samples

gPCAdata_dev <- plotPCA(gvst_dev, intgroup = c("time_point"), ntop= nrow(gvst_dev), returnData=TRUE)
percentVar_dev <- round(100*attr(gPCAdata_dev, "percentVar")) #plot PCA of samples with all data

allgenesfilt_PCA <- ggplot(gPCAdata_dev, aes(PC1, PC2, shape=time_point)) + 
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar_dev[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar_dev[2],"% variance")) +
  scale_shape_manual(values = c("Unfertilized_egg"=3, "Fertilized_egg"=1, "Cleavage"=10, "Prawn_chip"=5, "Early_gastrula"=14, "Mid_gastrula"=11, "Late_gastrula"=6, "Planula"=2, "Adult"=8)) +
  coord_fixed() +
    theme_bw() + #Set background color
  theme(panel.border = element_blank(), # Set border
                     #panel.grid.major = element_blank(), #Set major gridlines 
                     #panel.grid.minor = element_blank(), #Set minor gridlines
                     axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank()); allgenesfilt_PCA # + #Set the plot background

  #theme(legend.position = ("none")) #set title attributes
ggsave("2a-WGCNA/Output/Fig2-DEV-allgenesfilt-PCA.pdf", allgenesfilt_PCA)
## Saving 7 x 5 in image

Compile WGCNA Dataset

Transpose the filtered gene count matrix so that the gene IDs are rows and the sample IDs are columns.

datExpr <- as.data.frame(t(assay(gvst_dev))) #transpose to output to a new data frame with the column names as row names. And make all data numeric

Check for genes and samples with too many missing values with goodSamplesGenes. There shouldn’t be any because we performed pre-filtering

gsg = goodSamplesGenes(datExpr, verbose = 3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
gsg$allOK #Should return TRUE if not, the R chunk below will take care of flagged data
## [1] TRUE

Remove flagged samples if the allOK is FALSE

ncol(datExpr) #number genes before
## [1] 32772
if (!gsg$allOK) #If the allOK is FALSE...
{
# Optionally, print the gene and sample names that are flagged:
if (sum(!gsg$goodGenes)>0)
printFlush(paste("Removing genes:", paste(names(datExpr)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
printFlush(paste("Removing samples:", paste(rownames(datExpr)[!gsg$goodSamples], collapse = ", ")));
# Remove the offending genes and samples from the data:
datExpr = datExpr[gsg$goodSamples, gsg$goodGenes]
}
ncol(datExpr) #number genes after
## [1] 32772

Cluster the samples to look for obvious outliers

Look for outliers by examining

sampleTree = hclust(dist(datExpr), method = "average");
# Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# The user should change the dimensions if the window is too large or too small.
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)

There don’t look to be any outliers, so we will move on with business as usual.

Network construction and consensus module detection

Choosing a soft-thresholding power: Analysis of network topology β

The soft thresholding power (β) is the number to which the co-expression similarity is raised to calculate adjacency. The function pickSoftThreshold performs a network topology analysis. The user chooses a set of candidate powers, however the default parameters are suitable values.

# # Choose a set of soft-thresholding powers
# powers <- c(seq(from = 1, to=19, by=2), c(21:30)) #Create a string of numbers from 1 through 10, and even numbers from 10 through 20
# 
# # Call the network topology analysis function
# sft <-pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)

Plot the results.

# sizeGrWindow(9, 5)
# par(mfrow = c(1,2));
# cex1 = 0.9;
# # # Scale-free topology fit index as a function of the soft-thresholding power
# plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
#      xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
#     main = paste("Scale independence"));
# text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
#     labels=powers,cex=cex1,col="red");
# # # this line corresponds to using an R^2 cut-off
# abline(h=0.8,col="red")
# # # Mean connectivity as a function of the soft-thresholding power
# plot(sft$fitIndices[,1], sft$fitIndices[,5],
#     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
#     main = paste("Mean connectivity"))
# text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

I used a scale-free topology fit index R^2 of 0.85. This lowest recommended R^2 by Langfelder and Horvath is 0.8. I chose 0.85 because we want to use the smallest soft thresholding power that maximizes with model fit. It appears that our soft thresholding power is 23 because it is the loweest power before the R^2=0.8 threshold that maximizes with model fit.

Step-by-step network construction and module detection:

Co-expression adjacency and topological overlap matrix similarity

Adjacency and TOMsimilarity will be executed in supercomputer, Bluewaves, as our dataset is too large for most standard laptops to handle.

Save Rdata necessary for analysis in Bluewaves

# save(datExpr, file = "Data/datExpr.RData")

To be executed on the Bluewaves server
Co-expression similarity and adjacency, using the soft thresholding power 23 and translate the adjacency into topological overlap matrix to calculate the corresponding dissimilarity. I will use a signed network because we have a relatively high softPower, according to (>12): https://peterlangfelder.com/2018/11/25/__trashed/

# #Set up workspace
# getwd() #Display the current working directory
# #If necessary, change the path below to the directory where the data files are stored. "." means current directory. On Windows use a forward slash / instead of the usual \.
# workingDir = ".";
# setwd(WGCNA_dev);
# library(WGCNA) #Load the WGCNA package
# options(stringsAsFactors = FALSE) #The following setting is important, do not omit.
# enableWGCNAThreads() #Allow multi-threading within WGCNA. 
# 
# #Load the data saved in the first part
# adjTOM <- load(file="datExpr.RData")
# adjTOM
# 
# #Run analysis
# softPower=23 #Set softPower to 23
# adjacency=adjacency(datExpr, power=softPower,type="signed") #Calculate adjacency
# TOM= TOMsimilarity(adjacency,TOMType = "signed") #Translate adjacency into topological overlap matrix
# dissTOM= 1-TOM #Calculate dissimilarity in TOM
# save(adjacency, TOM, dissTOM, file = "../adjTOM.RData") #Save to bluewaves dir
# save(dissTOM, file = "../dissTOM.RData") #Save to load into RStudio

Now we can continue using Rstudio

Load in dissTOM file obtained from previous R chunk

# dissTOM_in <- load(file="~/Documents/Putnam_Lab.nosync/dissTOM.RData") #CHANGE TO THE PATH ON YOUR LOCAL COMPUTER
# dissTOM_in

Clustering using TOM

Form distance matrix

# geneTree= flashClust(as.dist(dissTOM), method="average")

We will now plot a dendrogram of genes. Each leaf corresponds to a gene, branches grouping together densely are interconnected, highly co-expressed genes.

# pdf(file="Output/RNAseq/dissTOMClustering.pdf", width=20, height=20)
# plot(geneTree, xlab="", sub="", main= "Gene Clustering on TOM-based dissimilarity", labels= FALSE,hang=0.04)
# dev.off()

Module identification using dynamicTreeCut

Module identification is essentially cutting the branches off the tree in the dendrogram above. We like large modules, so we set the minimum module size relatively high, so we will set the minimum size at 30. I chose 30 as it is the default value chosen by most studies using WGCNA.

# minModuleSize = 30
# dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
# deepSplit = 2, pamRespectsDendro = FALSE,
# minClusterSize = minModuleSize)
# table(dynamicMods) #list modules and respective sizes
# save(dynamicMods, geneTree, file = "Data/dyMod_geneTree.RData") #Save to load into RStudio

Module 0 is reserved for unassigned genes. The are other modules will be listed largest to smallest.

Load modules calculated from the adjacency matrix

dyMod_geneTree <- load(file = "2a-WGCNA/Output/dyMod_geneTree.RData")
dyMod_geneTree
## [1] "dynamicMods" "geneTree"

Plot the module assignment under the gene dendrogram

dynamicColors = labels2colors(dynamicMods) # Convert numeric labels into colors
table(dynamicColors)
## dynamicColors
##   antiquewhite2   antiquewhite4         bisque4           black            blue 
##              74             132             199             940            1697 
##           blue2           blue4      blueviolet           brown          brown2 
##              98              53              54            1513             101 
##          brown4           coral          coral1          coral2          coral3 
##             202              77             137             130              73 
##            cyan       darkgreen        darkgrey     darkmagenta  darkolivegreen 
##             571             413             410             308             317 
## darkolivegreen2 darkolivegreen4      darkorange     darkorange2         darkred 
##              54             102             409             205             425 
##   darkseagreen3   darkseagreen4   darkslateblue   darkturquoise      darkviolet 
##              78             142             197             412              95 
##        deeppink      firebrick3      firebrick4     floralwhite           green 
##              52              55             105             207            1178 
##     greenyellow            grey          grey60        honeydew       honeydew1 
##             708              29             456              79             144 
##      indianred3      indianred4           ivory  lavenderblush2  lavenderblush3 
##              56             107             211              83             148 
##      lightblue4      lightcoral       lightcyan      lightcyan1      lightgreen 
##              56             110             503             224             452 
##      lightpink3      lightpink4  lightslateblue  lightsteelblue lightsteelblue1 
##              83             158              59             113             247 
##     lightyellow         magenta        magenta4          maroon    mediumorchid 
##             452             837              84             160             130 
##   mediumpurple1   mediumpurple2   mediumpurple3   mediumpurple4    midnightblue 
##              60             114             250              72             554 
##    navajowhite1    navajowhite2          orange      orangered1      orangered3 
##              84             165             409              60             115 
##      orangered4   paleturquoise  palevioletred2  palevioletred3            pink 
##             252             318              86             167             922 
##           pink4            plum           plum1           plum2           plum3 
##              63             116             265             188              94 
##           plum4          purple             red       royalblue     saddlebrown 
##              52             747            1101             439             368 
##          salmon         salmon1         salmon2         salmon4         sienna3 
##             634              34              89             176             292 
##         sienna4         skyblue        skyblue1        skyblue2        skyblue3 
##              67             389             118             128             273 
##        skyblue4       steelblue             tan            tan4         thistle 
##              72             343             666              44              91 
##        thistle1        thistle2        thistle3        thistle4       turquoise 
##             179             183              92              48            2521 
##          violet           white          yellow         yellow3         yellow4 
##             317             393            1188              69             120 
##     yellowgreen 
##             281
#pdf(file="Output/RNAseq/dissTOMColorClustering.pdf", width=20, height=20)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05, main = "Gene dendrogram and module colors")

#dev.off()

Merge modules whose expression profiles are very similar or choose not to merge

Plot module similarity based on eigengene value

#Calculate eigengenes
MEList = moduleEigengenes(datExpr, colors = dynamicColors, softPower = 23)
MEs = MEList$eigengenes

#Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs)

#Cluster again and plot the results
METree = flashClust(as.dist(MEDiss), method = "average")

#pdf(file="Output/RNAseq/eigengeneClustering1.pdf", width = 20)
plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "")

#dev.off()

Merge modules with >85% eigengene similarity. Most studies use somewhere between 80-90% similarity. It looks like most of our modules are highly related so I will use 85% similarity as my merging threshold.

MEDissThres= 0.15 #merge modules that are 85% similar

#pdf(file="Output/RNAseq/eigengeneClustering2.pdf", width = 20)
plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "")
abline(h=MEDissThres, col="red")

#dev.off()

merge= mergeCloseModules(datExpr, dynamicColors, cutHeight= MEDissThres, verbose =3)
##  mergeCloseModules: Merging modules whose distance is less than 0.15
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 111 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 45 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 35 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 34 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 34 module eigengenes in given set.
mergedColors= merge$colors
mergedMEs= merge$newMEs

#pdf(file="Output/RNAseq/mergedClusters.pdf", width=20, height=20)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels= FALSE, hang=0.03, addGuide= TRUE, guideHang=0.05)

#dev.off()

Save new colors

moduleColors = mergedColors # Rename to moduleColors
colorOrder = c("grey", standardColors(50)); # Construct numerical labels corresponding to the colors
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;
ncol(MEs) #How many modules do we have now?
## [1] 34

Instead of 105 modules, we are now working with 34, a much more reasonable number.

Plot new tree

#Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs)
#Cluster again and plot the results
#pdf(file="Output/RNAseq/eigengeneClustering3.pdf")
METree = flashClust(as.dist(MEDiss), method = "average")
MEtreePlot = plot(METree, main = "Clustering of module eigengenes", xlab = "", sub = "")

#dev.off()

Relating modules to developmental stage

Quantifying module–trait associations

Prepare trait data. Data has to be numeric, so I will substitute time_points and type for numeric values

allTraits <- names(treatmentinfo_dev$time_point)
allTraits$Unfertilized_egg <- c(1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
allTraits$Fertilized_egg <- c(0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
allTraits$Cleavage <- c(0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
allTraits$Prawn_chip <- c(0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
allTraits$Early_gastrula <- c(0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0)
allTraits$Mid_gastrula <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0)
allTraits$Late_gastrula <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0)
allTraits$Planula <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0)
allTraits$Adult <- c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1)
datTraits <- as.data.frame(allTraits)
dim(datTraits)
## [1] 24  9
rownames(datTraits) <- treatmentinfo_dev$sample_id
print(datTraits)
##       Unfertilized_egg Fertilized_egg Cleavage Prawn_chip Early_gastrula
## X119                 1              0        0          0              0
## X120                 1              0        0          0              0
## X121                 1              0        0          0              0
## X127                 0              1        0          0              0
## X132                 0              1        0          0              0
## X153                 0              0        1          0              0
## X154                 0              0        1          0              0
## X159                 0              0        1          0              0
## X162                 0              0        0          1              0
## X163                 0              0        0          1              0
## X167                 0              0        0          1              0
## X179                 0              0        0          0              1
## X180                 0              0        0          0              1
## X184                 0              0        0          0              1
## X212                 0              0        0          0              0
## X215                 0              0        0          0              0
## X218                 0              0        0          0              0
## X221                 0              0        0          0              0
## X359                 0              0        0          0              0
## X361                 0              0        0          0              0
## X375                 0              0        0          0              0
## X1101                0              0        0          0              0
## X1548                0              0        0          0              0
## X1628                0              0        0          0              0
##       Mid_gastrula Late_gastrula Planula Adult
## X119             0             0       0     0
## X120             0             0       0     0
## X121             0             0       0     0
## X127             0             0       0     0
## X132             0             0       0     0
## X153             0             0       0     0
## X154             0             0       0     0
## X159             0             0       0     0
## X162             0             0       0     0
## X163             0             0       0     0
## X167             0             0       0     0
## X179             0             0       0     0
## X180             0             0       0     0
## X184             0             0       0     0
## X212             1             0       0     0
## X215             1             0       0     0
## X218             0             1       0     0
## X221             0             1       0     0
## X359             0             0       1     0
## X361             0             0       1     0
## X375             0             0       1     0
## X1101            0             0       0     1
## X1548            0             0       0     1
## X1628            0             0       0     1

Define numbers of genes and samples

nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

Recalculate MEs with color labels

MEs0 = moduleEigengenes(datExpr, moduleColors,softPower=23)$eigengenes
MEs = orderMEs(MEs0)
names(MEs)
##  [1] "MEcoral1"         "MElavenderblush2" "MElightslateblue" "MEmagenta4"      
##  [5] "MEmediumpurple3"  "MEantiquewhite2"  "MEantiquewhite4"  "MEthistle"       
##  [9] "MEhoneydew1"      "MEmidnightblue"   "MEindianred3"     "MEblue2"         
## [13] "MEplum3"          "MEblue4"          "MEskyblue1"       "MEbrown2"        
## [17] "MEcoral"          "MEdarkseagreen4"  "MEdarkslateblue"  "MEplum4"         
## [21] "MEdarkmagenta"    "MEviolet"         "MEnavajowhite1"   "MEblue"          
## [25] "MEcyan"           "MEblueviolet"     "MEivory"          "MEmediumpurple1" 
## [29] "MEsalmon"         "MEsienna3"        "MEthistle4"       "MElightsteelblue"
## [33] "MEsalmon4"        "MEgrey"

Correlations of traits with eigengenes

moduleTraitCor = cor(MEs, datTraits, use = "p");
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples);
Colors=sub("ME","",names(MEs))

moduleTraitTree = hclust(dist(t(moduleTraitCor)), method = "average");
plot(moduleTraitTree, main = "Life stage clustering based on module-trait correlation", sub="", xlab="", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)

Correlations of genes with eigengenes

moduleGeneCor=cor(MEs,datExpr)
moduleGenePvalue = corPvalueStudent(moduleGeneCor, nSamples);

Plot module-trait associations

Represent module trait correlations as a heatmap

textMatrix = paste(signif(moduleTraitCor, 2), "\n(",signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) = dim(moduleTraitCor)
head(textMatrix)
##      [,1]            [,2]            [,3]            [,4]            
## [1,] "0.55\n(0.005)" "0.6\n(0.002)"  "0.024\n(0.9)"  "-0.37\n(0.07)" 
## [2,] "0.31\n(0.1)"   "0.39\n(0.06)"  "0.00061\n(1)"  "-0.088\n(0.7)" 
## [3,] "0.38\n(0.07)"  "0.45\n(0.03)"  "-0.41\n(0.05)" "-0.59\n(0.002)"
## [4,] "0.11\n(0.6)"   "0.19\n(0.4)"   "-0.39\n(0.06)" "-0.32\n(0.1)"  
## [5,] "0.59\n(0.002)" "0.57\n(0.003)" "-0.058\n(0.8)" "-0.44\n(0.03)" 
## [6,] "0.55\n(0.005)" "0.42\n(0.04)"  "0.11\n(0.6)"   "-0.17\n(0.4)"  
##      [,5]             [,6]            [,7]            [,8]           
## [1,] "-0.084\n(0.7)"  "0.14\n(0.5)"   "-0.0093\n(1)"  "-0.34\n(0.1)" 
## [2,] "0.35\n(0.09)"   "0.31\n(0.1)"   "-0.032\n(0.9)" "-0.51\n(0.01)"
## [3,] "0.013\n(1)"     "0.37\n(0.07)"  "0.23\n(0.3)"   "-0.11\n(0.6)" 
## [4,] "0.53\n(0.008)"  "0.52\n(0.01)"  "0.16\n(0.4)"   "-0.23\n(0.3)" 
## [5,] "-0.46\n(0.02)"  "-0.12\n(0.6)"  "0.037\n(0.9)"  "0.00032\n(1)" 
## [6,] "-0.54\n(0.007)" "-0.45\n(0.03)" "-0.23\n(0.3)"  "0.038\n(0.9)" 
##      [,9]            
## [1,] "-0.39\n(0.06)" 
## [2,] "-0.62\n(0.001)"
## [3,] "-0.16\n(0.5)"  
## [4,] "-0.43\n(0.04)" 
## [5,] "-0.042\n(0.8)" 
## [6,] "0.23\n(0.3)"
labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits),  yLabels = names(MEs), ySymbols = names(MEs), cex.lab.y= 0.55, cex.lab.x= 0.55, colors = blueWhiteRed(50), textMatrix = textMatrix, setStdMargins = TRUE, cex.text = 0.25, textAdj = , zlim = c(-1,1), main = paste("Module-trait relationships"))

#pdf(file="Output/RNAseq/Module-trait-relationships.pdf")
labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits),  yLabels = names(MEs), ySymbols = names(MEs), cex.lab.y= 0.55, cex.lab.x= 0.55, colors = blueWhiteRed(50), textMatrix = textMatrix, setStdMargins = TRUE, cex.text = 0.25, textAdj = , zlim = c(-1,1), main = paste("Module-trait relationships"))

#dev.off()

Attempt at complexHeatmap

#bold sig p-values
#dendrogram with WGCNA MEtree cut-off
#colored y-axis

#Create list of pvalues for eigengene correlation with specific life stages
heatmappval <- signif(moduleTraitPvalue, 1)

#Make list of heatmap row colors
htmap.colors <- names(MEs)
htmap.colors <- gsub("ME", "", htmap.colors)

pdf(file = "2a-WGCNA/Output/Fig3-Module-trait-relationship-heatmap.pdf", height = 11.5, width = 8)
ht=Heatmap(moduleTraitCor, name = "Eigengene", column_title = "Module-Trait Eigengene Correlation", 
        col = blueWhiteRed(50), 
        row_names_side = "left", row_dend_side = "left",
        width = unit(4, "in"), height = unit(8.5, "in"), 
        column_order = 1:9, column_dend_reorder = FALSE, cluster_columns = hclust(dist(t(moduleTraitCor)), method = "average"), column_split = 4, column_dend_height = unit(0.5, "in"),
        cluster_rows = METree, row_split = 9, row_gap = unit(2.5, "mm"), border = TRUE,
        cell_fun = function(j, i, x, y, w, h, col) {
        if(heatmappval[i, j] <= 0.05) {
            grid.text(sprintf("%s", heatmappval[i, j]), x, y, gp = gpar(fontsize = 8, fontface = "bold"))
        }
        else {
            grid.text(sprintf("%s", heatmappval[i, j]), x, y, gp = gpar(fontsize = 8, fontface = "plain"))
        }},
        column_names_gp =  gpar(fontsize = 10),
row_names_gp = gpar(fontsize = 10, alpha = 0.75, border = TRUE, fill = htmap.colors))
draw(ht)
dev.off()
## quartz_off_screen 
##                 2

Create dataframe that associates module colors with clusters based on the above heatmap and the MEtree dendrogram.

MEcluster1 <- data.frame(moduleColor = c("grey"), moduleCluster = c(1))
MEcluster2 <- data.frame(moduleColor = c("coral1", "lavenderblush2", "lightslateblue", "magenta4"), moduleCluster = c(2))
MEcluster3 <- data.frame(moduleColor = c("mediumpurple3", "antiquewhite2", "antiquewhite4", "thistle", "honeydew1", "midnightblue"), moduleCluster = c(3))
MEcluster4 <- data.frame(moduleColor = c("indianred3", "blue2", "plum3", "blue4", "skyblue1",  "brown2", "coral"), moduleCluster = c(4))
MEcluster5 <- data.frame(moduleColor = c("darkseagreen4", "darkslateblue", "plum4"), moduleCluster = c(5))
MEcluster6 <- data.frame(moduleColor = c("darkmagenta", "violet"), moduleCluster = c(6))
MEcluster7 <- data.frame(moduleColor = c("navajowhite1",  "blue", "cyan", "blueviolet", "ivory"), moduleCluster = c(7))
MEcluster8 <- data.frame(moduleColor = c("mediumpurple1", "salmon", "sienna3"), moduleCluster = c(8))
MEcluster9 <- data.frame(moduleColor = c("thistle4", "lightsteelblue", "salmon4"), moduleCluster = c(9))

moduleCluster = bind_rows(MEcluster1, MEcluster2, MEcluster3, MEcluster4, MEcluster5, MEcluster6, MEcluster7, MEcluster8, MEcluster9)
head(moduleCluster)
##      moduleColor moduleCluster
## 1           grey             1
## 2         coral1             2
## 3 lavenderblush2             2
## 4 lightslateblue             2
## 5       magenta4             2
## 6  mediumpurple3             3

View module eigengene data and make dataframe for Strader plots.

head(MEs)
##        MEcoral1 MElavenderblush2 MElightslateblue  MEmagenta4 MEmediumpurple3
## X119 0.24848684       0.13414942        0.1577409  0.04115021     0.280668704
## X120 0.36242665       0.18118856        0.2451331  0.04327770     0.371429910
## X121 0.28756239       0.18311741        0.2130381  0.09911997     0.306056568
## X127 0.38720460       0.26627027        0.2949086  0.10902656     0.374482545
## X132 0.41996939       0.26166151        0.3191636  0.14173261     0.403803615
## X153 0.06467411       0.03716312       -0.1580865 -0.20065179     0.007230752
##      MEantiquewhite2 MEantiquewhite4  MEthistle MEhoneydew1 MEmidnightblue
## X119      0.31448429      0.20508202 0.21178628   0.2389158      0.3301335
## X120      0.29985667      0.24817589 0.22040504   0.2544884      0.3523600
## X121      0.27763872      0.19348435 0.21868135   0.2274256      0.3128549
## X127      0.27555709      0.22669706 0.22723206   0.2433387      0.3169200
## X132      0.29239293      0.26703000 0.27389217   0.2633572      0.3580470
## X153      0.06375486     -0.06597227 0.09136637   0.2091421      0.1676654
##      MEindianred3     MEblue2     MEplum3     MEblue4   MEskyblue1   MEbrown2
## X119  -0.10795196 -0.09607899 -0.08914612  0.08736061 -0.098956882 -0.1439346
## X120  -0.09098988 -0.11173416 -0.09853049  0.09639855 -0.093984954 -0.1468603
## X121  -0.08127299 -0.13614835 -0.07669755  0.15704270 -0.072462780 -0.1388093
## X127  -0.12082756 -0.17335359 -0.09601977  0.07717422 -0.066559661 -0.1688178
## X132  -0.09135647 -0.17830304 -0.10395110  0.12465531 -0.059198242 -0.1485724
## X153  -0.15299643  0.26728478  0.17732256 -0.04426336 -0.009120716 -0.1521950
##         MEcoral MEdarkseagreen4 MEdarkslateblue    MEplum4 MEdarkmagenta
## X119 -0.2588041      -0.2346895      -0.2113772 -0.1702458    -0.3123438
## X120 -0.2659300      -0.2434984      -0.2192339 -0.2085663    -0.3305167
## X121 -0.2708921      -0.2386853      -0.2150389 -0.1869669    -0.3524672
## X127 -0.3185435      -0.2678420      -0.2340784 -0.1984467    -0.4277561
## X132 -0.3377147      -0.2495582      -0.2277209 -0.1774188    -0.4139293
## X153  0.0373090      -0.2579290      -0.2093208 -0.1810898    -0.1129797
##        MEviolet MEnavajowhite1     MEblue      MEcyan MEblueviolet     MEivory
## X119 -0.2711216    -0.05866413 -0.1699477 -0.09114489   -0.1004578 -0.09927486
## X120 -0.2591239    -0.08808336 -0.1819583 -0.09972959   -0.1019951 -0.10124562
## X121 -0.3128955    -0.07454440 -0.1803717 -0.10026336   -0.1063120 -0.09768833
## X127 -0.3760417    -0.10073408 -0.2070477 -0.11385931   -0.1111824 -0.10723292
## X132 -0.3629773    -0.09105489 -0.1917661 -0.10516027   -0.1084458 -0.10339576
## X153  0.1794391    -0.09664323 -0.2010656 -0.10364653   -0.1070528 -0.09879363
##      MEmediumpurple1   MEsalmon  MEsienna3 MEthistle4 MElightsteelblue
## X119      -0.1395402 -0.1093045 -0.1489548 -0.1126147       0.08772474
## X120      -0.1405495 -0.1128716 -0.1493759 -0.1258393       0.11839684
## X121      -0.1330423 -0.1146621 -0.1493579 -0.1054161       0.12489341
## X127      -0.1509663 -0.1237708 -0.1512457 -0.1144628       0.15374612
## X132      -0.1482514 -0.1131157 -0.1456306 -0.1180373       0.17132009
## X153      -0.1442459 -0.1171463 -0.1583067 -0.1469233      -0.23304269
##         MEsalmon4     MEgrey
## X119 -0.039561684  0.3608015
## X120 -0.045734044  0.1196569
## X121  0.004466899  0.2546754
## X127 -0.030711619 -0.6884973
## X132  0.017647507  0.0737040
## X153 -0.321431939 -0.1930869
names(MEs)
##  [1] "MEcoral1"         "MElavenderblush2" "MElightslateblue" "MEmagenta4"      
##  [5] "MEmediumpurple3"  "MEantiquewhite2"  "MEantiquewhite4"  "MEthistle"       
##  [9] "MEhoneydew1"      "MEmidnightblue"   "MEindianred3"     "MEblue2"         
## [13] "MEplum3"          "MEblue4"          "MEskyblue1"       "MEbrown2"        
## [17] "MEcoral"          "MEdarkseagreen4"  "MEdarkslateblue"  "MEplum4"         
## [21] "MEdarkmagenta"    "MEviolet"         "MEnavajowhite1"   "MEblue"          
## [25] "MEcyan"           "MEblueviolet"     "MEivory"          "MEmediumpurple1" 
## [29] "MEsalmon"         "MEsienna3"        "MEthistle4"       "MElightsteelblue"
## [33] "MEsalmon4"        "MEgrey"
Strader_MEs <- MEs
Strader_MEs$time_point <- treatmentinfo_dev$time_point
Strader_MEs$sample_id <- rownames(Strader_MEs)
head(Strader_MEs)
##        MEcoral1 MElavenderblush2 MElightslateblue  MEmagenta4 MEmediumpurple3
## X119 0.24848684       0.13414942        0.1577409  0.04115021     0.280668704
## X120 0.36242665       0.18118856        0.2451331  0.04327770     0.371429910
## X121 0.28756239       0.18311741        0.2130381  0.09911997     0.306056568
## X127 0.38720460       0.26627027        0.2949086  0.10902656     0.374482545
## X132 0.41996939       0.26166151        0.3191636  0.14173261     0.403803615
## X153 0.06467411       0.03716312       -0.1580865 -0.20065179     0.007230752
##      MEantiquewhite2 MEantiquewhite4  MEthistle MEhoneydew1 MEmidnightblue
## X119      0.31448429      0.20508202 0.21178628   0.2389158      0.3301335
## X120      0.29985667      0.24817589 0.22040504   0.2544884      0.3523600
## X121      0.27763872      0.19348435 0.21868135   0.2274256      0.3128549
## X127      0.27555709      0.22669706 0.22723206   0.2433387      0.3169200
## X132      0.29239293      0.26703000 0.27389217   0.2633572      0.3580470
## X153      0.06375486     -0.06597227 0.09136637   0.2091421      0.1676654
##      MEindianred3     MEblue2     MEplum3     MEblue4   MEskyblue1   MEbrown2
## X119  -0.10795196 -0.09607899 -0.08914612  0.08736061 -0.098956882 -0.1439346
## X120  -0.09098988 -0.11173416 -0.09853049  0.09639855 -0.093984954 -0.1468603
## X121  -0.08127299 -0.13614835 -0.07669755  0.15704270 -0.072462780 -0.1388093
## X127  -0.12082756 -0.17335359 -0.09601977  0.07717422 -0.066559661 -0.1688178
## X132  -0.09135647 -0.17830304 -0.10395110  0.12465531 -0.059198242 -0.1485724
## X153  -0.15299643  0.26728478  0.17732256 -0.04426336 -0.009120716 -0.1521950
##         MEcoral MEdarkseagreen4 MEdarkslateblue    MEplum4 MEdarkmagenta
## X119 -0.2588041      -0.2346895      -0.2113772 -0.1702458    -0.3123438
## X120 -0.2659300      -0.2434984      -0.2192339 -0.2085663    -0.3305167
## X121 -0.2708921      -0.2386853      -0.2150389 -0.1869669    -0.3524672
## X127 -0.3185435      -0.2678420      -0.2340784 -0.1984467    -0.4277561
## X132 -0.3377147      -0.2495582      -0.2277209 -0.1774188    -0.4139293
## X153  0.0373090      -0.2579290      -0.2093208 -0.1810898    -0.1129797
##        MEviolet MEnavajowhite1     MEblue      MEcyan MEblueviolet     MEivory
## X119 -0.2711216    -0.05866413 -0.1699477 -0.09114489   -0.1004578 -0.09927486
## X120 -0.2591239    -0.08808336 -0.1819583 -0.09972959   -0.1019951 -0.10124562
## X121 -0.3128955    -0.07454440 -0.1803717 -0.10026336   -0.1063120 -0.09768833
## X127 -0.3760417    -0.10073408 -0.2070477 -0.11385931   -0.1111824 -0.10723292
## X132 -0.3629773    -0.09105489 -0.1917661 -0.10516027   -0.1084458 -0.10339576
## X153  0.1794391    -0.09664323 -0.2010656 -0.10364653   -0.1070528 -0.09879363
##      MEmediumpurple1   MEsalmon  MEsienna3 MEthistle4 MElightsteelblue
## X119      -0.1395402 -0.1093045 -0.1489548 -0.1126147       0.08772474
## X120      -0.1405495 -0.1128716 -0.1493759 -0.1258393       0.11839684
## X121      -0.1330423 -0.1146621 -0.1493579 -0.1054161       0.12489341
## X127      -0.1509663 -0.1237708 -0.1512457 -0.1144628       0.15374612
## X132      -0.1482514 -0.1131157 -0.1456306 -0.1180373       0.17132009
## X153      -0.1442459 -0.1171463 -0.1583067 -0.1469233      -0.23304269
##         MEsalmon4     MEgrey       time_point sample_id
## X119 -0.039561684  0.3608015 Unfertilized_egg      X119
## X120 -0.045734044  0.1196569 Unfertilized_egg      X120
## X121  0.004466899  0.2546754 Unfertilized_egg      X121
## X127 -0.030711619 -0.6884973   Fertilized_egg      X127
## X132  0.017647507  0.0737040   Fertilized_egg      X132
## X153 -0.321431939 -0.1930869         Cleavage      X153

Calculate nine over-arching expression patterns using mean eigengene for each module in a cluster

Create a column to the Strader_MEs data frame containing life stage

Strader_MEs$time_point <- c("Unfertilized_egg", "Unfertilized_egg", "Unfertilized_egg", "Fertilized_egg", "Fertilized_egg", "Cleavage", "Cleavage", "Cleavage", "Prawn_chip", "Prawn_chip", "Prawn_chip", "Early_gastrula", "Early_gastrula", "Early_gastrula", "Mid_gastrula", "Mid_gastrula", "Late_gastrula", "Late_gastrula", "Planula", "Planula", "Planula", "Adult", "Adult", "Adult")

C2_Strader_MEs <- select(Strader_MEs, MEcoral1:MEmagenta4)
C2_Strader_MEs$Mean <- rowMeans(C2_Strader_MEs)
C3_Strader_MEs <- select(Strader_MEs, MEmediumpurple3:MEmidnightblue)
C3_Strader_MEs$Mean <- rowMeans(C3_Strader_MEs)
C4_Strader_MEs <- select(Strader_MEs, MEindianred3:MEcoral)
C4_Strader_MEs$Mean <- rowMeans(C4_Strader_MEs)
C5_Strader_MEs <- select(Strader_MEs, MEdarkseagreen4:MEplum4)
C5_Strader_MEs$Mean <- rowMeans(C5_Strader_MEs)
C6_Strader_MEs <- select(Strader_MEs, MEdarkmagenta:MEviolet)
C6_Strader_MEs$Mean <- rowMeans(C6_Strader_MEs)
C7_Strader_MEs <- select(Strader_MEs, MEnavajowhite1:MEivory)
C7_Strader_MEs$Mean <- rowMeans(C7_Strader_MEs)
C8_Strader_MEs <- select(Strader_MEs, MEmediumpurple1:MEsienna3)
C8_Strader_MEs$Mean <- rowMeans(C8_Strader_MEs)
C9_Strader_MEs <- select(Strader_MEs, MEthistle4:MEsalmon4)
C9_Strader_MEs$Mean <- rowMeans(C9_Strader_MEs)

expressionProfile_data <- as.data.frame(cbind(time_point = Strader_MEs$time_point, cluster1= Strader_MEs$MEgrey, cluster2 = C2_Strader_MEs$Mean, cluster3 = C3_Strader_MEs$Mean, cluster4 = C4_Strader_MEs$Mean, cluster5 = C5_Strader_MEs$Mean, cluster6 = C6_Strader_MEs$Mean, cluster7 = C7_Strader_MEs$Mean, cluster8 = C8_Strader_MEs$Mean, cluster9 = C9_Strader_MEs$Mean))
cols.num <- c(2:10)

expressionProfile_data[cols.num] <- sapply(expressionProfile_data[cols.num],as.numeric)
sapply(expressionProfile_data, class)
##  time_point    cluster1    cluster2    cluster3    cluster4    cluster5 
## "character"   "numeric"   "numeric"   "numeric"   "numeric"   "numeric" 
##    cluster6    cluster7    cluster8    cluster9 
##   "numeric"   "numeric"   "numeric"   "numeric"
dim(expressionProfile_data)
## [1] 24 10
head(expressionProfile_data)
##         time_point   cluster1    cluster2   cluster3    cluster4   cluster5
## 1 Unfertilized_egg  0.3608015  0.14538184 0.26351176 -0.10107315 -0.2054375
## 2 Unfertilized_egg  0.1196569  0.20800651 0.29111931 -0.10166160 -0.2237662
## 3 Unfertilized_egg  0.2546754  0.19570946 0.25602357 -0.08846290 -0.2135637
## 4   Fertilized_egg -0.6884973  0.26435252 0.27737125 -0.12384966 -0.2334557
## 5   Fertilized_egg  0.0737040  0.28563178 0.30975382 -0.11349153 -0.2182326
## 6         Cleavage -0.1930869 -0.06422527 0.07886454  0.01762012 -0.2161132
##      cluster6   cluster7   cluster8     cluster9
## 1 -0.29173270 -0.1038979 -0.1325999 -0.021483887
## 2 -0.29482026 -0.1146024 -0.1342657 -0.017725485
## 3 -0.33268135 -0.1118359 -0.1323541  0.007981407
## 4 -0.40189890 -0.1280113 -0.1419943  0.002857224
## 5 -0.38845331 -0.1199646 -0.1356659  0.023643448
## 6  0.03322969 -0.1214403 -0.1398997 -0.233799295

Set timepoint order for plotting

time_point_order = c("Unfertilized_egg", "Fertilized_egg", "Cleavage", "Prawn_chip", "Early_gastrula", "Mid_gastrula", "Late_gastrula", "Planula", "Adult") #Set time_point order

Plot mean module eigengene for each cluster

Cluster1Plot <- expressionProfile_data %>%
        select(time_point, cluster1) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster1)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ylab("Mean Module Eigenegene") +
  theme_bw() + 
  ggtitle("A) Cluster 1") +
  theme(axis.text.x=element_blank(), #set x-axis label size
        axis.title.x=element_blank(), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_text(size = 14), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 1", size = 6) +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey") #
Cluster2Plot <- expressionProfile_data %>%
        select(time_point, cluster2) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster2)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("B) Cluster 2") +
  theme_bw() + 
  theme(axis.text.x=element_blank(), #set x-axis label size
        axis.title.x=element_blank(), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_blank(), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 4", size = 6)  +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey") #
Cluster3Plot <- expressionProfile_data %>%
        select(time_point, cluster3) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster3)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("C) Cluster 3") +
  theme_bw() + 
 theme(axis.text.x=element_blank(), #set x-axis label size
        axis.title.x=element_blank(), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_blank(), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 6", size = 6)  +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")
Cluster4Plot <- expressionProfile_data %>%
        select(time_point, cluster4) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster4)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("D) Cluster 4") +
  ylab("Mean Module Eigenegene") +
  theme_bw() + 
 theme(axis.text.x=element_blank(), #set x-axis label size
        axis.title.x=element_blank(), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_text(size = 14), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 7", size = 6)  +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")
Cluster5Plot <- expressionProfile_data %>%
        select(time_point, cluster5) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster5)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("E) Cluster 5") +
  theme_bw() + 
 theme(axis.text.x=element_blank(), #set x-axis label size
        axis.title.x=element_blank(), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_blank(), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 3", size = 6) +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")
Cluster6Plot <- expressionProfile_data %>%
        select(time_point, cluster6) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster6)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("F) Cluster 6") +
  theme_bw() + 
  theme(axis.text.x=element_blank(), #set x-axis label size
        axis.title.x=element_blank(), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_blank(), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 2", size = 6) +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")
Cluster7Plot <- expressionProfile_data %>%
        select(time_point, cluster7) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster7)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("G) Cluster 7") +
  ylab("Mean Module Eigenegene") +
  theme_bw() + 
  theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust=1, size = 14), #set x-axis label size
        axis.title.x=element_text(size = 14), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_text(size = 14), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 5", size = 6) +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")
Cluster8Plot <- expressionProfile_data %>%
        select(time_point, cluster8) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster8)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
        scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("H) Cluster 8") +
  theme_bw() + 
  theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust=1, size = 14), #set x-axis label size
        axis.title.x=element_text(size = 14), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_blank(), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 3", size = 6) +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")
Cluster9Plot <- expressionProfile_data %>%
        select(time_point, cluster9) %>% 
  group_by(time_point) %>% 
  ggplot(aes(x=time_point, y=cluster9)) +
  geom_jitter(alpha = 0.5) +
  geom_boxplot(alpha=0) +
  scale_x_discrete(limits=time_point_order) +
  #ylim(-0.5,1) +
  ggtitle("I) Cluster 9") +
  xlab("Time Point") +
  theme_bw() + #Set background color
  theme(axis.text.x=element_text(angle = 90, vjust = 0.5, hjust=1, size = 14), #set x-axis label size
        axis.title.x=element_text(size = 14), #set x-axis title size
        axis.ticks.x=element_blank(), #No x-label ticks
        axis.title.y=element_blank(), #No y-axis title
        axis.text.y=element_text(size = 14), #set y-axis label size, #No y-title
        panel.border = element_blank(), # Set border
        panel.grid.major = element_blank(), #Set major gridlines
        panel.grid.minor = element_blank(), #Set minor gridlines
        axis.line = element_line(colour = "black"), #Set axes color
        plot.background=element_blank(),
        plot.title = element_text(size=22)) + #Set the plot background
  annotate("text", x = 1.2, y = 0.5, label = "n = 3", size = 6) +
  geom_hline(yintercept = 0, linetype="dashed", color = "grey")

Compile plots into 1 graph, aligning by y-axis and save

expressionProfiles <- cowplot::plot_grid(Cluster1Plot, Cluster2Plot, Cluster3Plot, Cluster4Plot, Cluster5Plot, Cluster6Plot, Cluster7Plot, Cluster8Plot, Cluster9Plot, align = "v", ncol = 3, nrow = 3)
ggsave("2a-WGCNA/Output/SFig1-RNAseq-expressionProfiles.png", expressionProfiles, height = 16, width = 21, units = "in")

Gene relationship to trait and important modules: Gene Significance and Module Membership

We quantify associations of individual genes with life stage by defining Gene Significance GS as the absolute value of the correlation between the gene and the time_point. For each module, we also define a quantitative measure of module membership MM as the correlation of the module eigengene and the gene expression profile.

Define variable weight containing the weight column of datTrait

time_point <- as.data.frame(c(1,1,1,
                              2,2,
                              3,3,3,
                              4,4,4,
                              5,5,5,
                              6,6,
                              7,7,
                              8,8,8,
                              9,9,9))
names(time_point) = "timepoint"
dim(time_point)
## [1] 24  1

Colors of the modules

modNames = substring(names(MEs), 3)

geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples));

names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");

geneTraitSignificance = as.data.frame(cor(datExpr, time_point, use = "p"));
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples));

names(geneTraitSignificance) = paste("GS.", names(time_point), sep="");
names(GSPvalue) = paste("p.GS.", names(time_point), sep="");

Summary output of network analysis results

Make a dataframe that connects traits, genes, and gene annotation

Import annotation file.

Mcap.annot <- read_tsv( "0-BLAST-GO-KO/Output/200824_Mcap_Blast_GO_KO.tsv", col_names = TRUE) #biological annotation information
## Rows: 55229 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (10): gene_id, description, UniProtKB_entry, status, protein_names, gene...
## dbl  (4): length, eValue, bitscore, simMean
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tail(Mcap.annot)
## # A tibble: 6 × 14
##   gene_id description    length  eValue bitscore simMean UniProtKB_entry  status
##   <chr>   <chr>           <dbl>   <dbl>    <dbl>   <dbl> <chr>            <chr> 
## 1 g9992   XP_027059272.1    229 1.8e-36    164.       NA A0A3M6URP7_9CNID unrev…
## 2 g9994   XP_032226016.1     89 3  e-10     75.1      NA <NA>             <NA>  
## 3 g9995   XP_029180319.1    299 2.8e-41    180.       NA <NA>             <NA>  
## 4 g9996   KXJ20016.1        151 1.1e-12     84        NA <NA>             <NA>  
## 5 g9997   PFX27832.1        141 4.4e-36    161.       NA A0A2B4SH83_STYPI unrev…
## 6 g9999   XP_015774017.1    212 1  e-75    294.       NA <NA>             <NA>  
## # … with 6 more variables: protein_names <chr>, gene_names <chr>,
## #   organism <chr>, ko <chr>, GO_IDs <chr>, GO_terms <chr>
dim(Mcap.annot)
## [1] 55229    14

Match up genes in datExpr to annotation file

names(Mcap.annot)
##  [1] "gene_id"         "description"     "length"          "eValue"         
##  [5] "bitscore"        "simMean"         "UniProtKB_entry" "status"         
##  [9] "protein_names"   "gene_names"      "organism"        "ko"             
## [13] "GO_IDs"          "GO_terms"
probes = names(datExpr)
probes2annot = match(probes, Mcap.annot$gene_id)

# The following is the number of probes without annotation... Should return 0.
sum(is.na(probes2annot))
## [1] 2105

Create the starting data frame

geneInfo0 = data.frame(gene_id = probes,
Accession = Mcap.annot$description[probes2annot],
Bitscore = Mcap.annot$bitscore[probes2annot],
eValue = Mcap.annot$eValue[probes2annot],
Description = Mcap.annot$protein_names[probes2annot],
KEGG = Mcap.annot$ko[probes2annot],
Annotation.GO.ID = Mcap.annot$GO_IDs[probes2annot],
Annotation.GO.Term = Mcap.annot$GO_terms[probes2annot],
moduleColor = moduleColors,
geneTraitSignificance,
GSPvalue)

Order modules by their significance for time_point

modOrder = order(-abs(cor(MEs, time_point, use = "p")))

Add module membership information in the chosen order

for (mod in 1:ncol(geneModuleMembership))
{
oldNames = names(geneInfo0)
geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]],
MMPvalue[, modOrder[mod]]);
names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""),
paste("p.MM.", modNames[modOrder[mod]], sep=""))
}

Order the genes in the geneInfo variable first by module color, then by geneTraitSignificance

geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.timepoint));
geneInfo = geneInfo0[geneOrder, ]
head(geneInfo)
##                               gene_id      Accession Bitscore   eValue
## g45338                         g45338 XP_029205145.1   1048.5 3.5e-302
## adi2mcaRNA23838_R0 adi2mcaRNA23838_R0     EDO45783.1    200.7  2.4e-47
## g52470                         g52470 XP_029184981.1   2174.8  0.0e+00
## g47993                         g47993 XP_029198885.1   1842.8  0.0e+00
## g29630                         g29630 XP_029179365.1    337.8  5.1e-89
## g47496                         g47496 XP_029189624.1   2915.2  0.0e+00
##                                     Description KEGG
## g45338                                     <NA> <NA>
## adi2mcaRNA23838_R0 Predicted protein (Fragment) <NA>
## g52470                                     <NA> <NA>
## g47993                                     <NA> <NA>
## g29630                                     <NA> <NA>
## g47496                                     <NA> <NA>
##                                                     Annotation.GO.ID
## g45338                                                 GO:0046872;NA
## adi2mcaRNA23838_R0                                             NA;NA
## g52470                                                 GO:0005085;NA
## g47993                                                 GO:0030705;NA
## g29630                                                 GO:0035556;NA
## g47496             GO:0004672; GO:0005515; GO:0005524; GO:0006468;NA
##                                                                                   Annotation.GO.Term
## g45338                                                                          metal ion binding;NA
## adi2mcaRNA23838_R0                                                                             NA;NA
## g52470                                                 guanyl-nucleotide exchange factor activity;NA
## g47993                                             cytoskeleton-dependent intracellular transport;NA
## g29630                                                          intracellular signal transduction;NA
## g47496             protein kinase activity; protein binding; ATP binding; protein phosphorylation;NA
##                      moduleColor GS.timepoint p.GS.timepoint MM.honeydew1
## g45338             antiquewhite2   -0.6782481   0.0002699569    0.6493547
## adi2mcaRNA23838_R0 antiquewhite2   -0.6497504   0.0005897389    0.6349968
## g52470             antiquewhite2   -0.6471519   0.0006308163    0.6602197
## g47993             antiquewhite2   -0.6299791   0.0009696937    0.6074457
## g29630             antiquewhite2   -0.6146028   0.0013956627    0.5963822
## g47496             antiquewhite2   -0.6137076   0.0014247499    0.6324057
##                    p.MM.honeydew1    MM.blue  p.MM.blue MM.midnightblue
## g45338               0.0005958406 -0.4294572 0.03623168       0.9421482
## adi2mcaRNA23838_R0   0.0008574642 -0.4083695 0.04757572       0.9373262
## g52470               0.0004467424 -0.4174523 0.04239103       0.9471076
## g47993               0.0016431148 -0.3642718 0.08011919       0.9386874
## g29630               0.0020990586 -0.3568866 0.08690693       0.9161760
## g47496               0.0009139368 -0.4202687 0.04087702       0.9031936
##                    p.MM.midnightblue MM.darkseagreen4 p.MM.darkseagreen4
## g45338                  6.397458e-12       -0.8078179       1.814830e-06
## adi2mcaRNA23838_R0      1.508744e-11       -0.7893554       4.537968e-06
## g52470                  2.443203e-12       -0.8882157       6.939186e-09
## g47993                  1.192598e-11       -0.8296250       5.376382e-07
## g29630                  3.343902e-10       -0.7923920       3.927583e-06
## g47496                  1.532036e-09       -0.8640011       5.334823e-08
##                    MM.darkmagenta p.MM.darkmagenta MM.lavenderblush2
## g45338                 -0.8706107     3.184625e-08         0.2630509
## adi2mcaRNA23838_R0     -0.8912570     5.198605e-09         0.2678851
## g52470                 -0.7764754     8.168592e-06         0.1090637
## g47993                 -0.8013303     2.531398e-06         0.1154398
## g29630                 -0.8058690     2.008223e-06         0.1561628
## g47496                 -0.7655610     1.305685e-05         0.1484546
##                    p.MM.lavenderblush2 MM.coral1  p.MM.coral1    MM.blue4
## g45338                       0.2142734 0.6971211 1.533834e-04  0.10633882
## adi2mcaRNA23838_R0           0.2056601 0.7300742 5.130046e-05  0.04140449
## g52470                       0.6119475 0.5426716 6.145647e-03  0.03069839
## g47993                       0.5911653 0.5914564 2.334346e-03 -0.03771582
## g29630                       0.4661934 0.6156474 1.362363e-03  0.03178544
## g47496                       0.4887473 0.5505595 5.306410e-03  0.07183075
##                    p.MM.blue4     MM.cyan p.MM.cyan MM.mediumpurple1
## g45338              0.6209195 -0.12895436 0.5481491       -0.3964102
## adi2mcaRNA23838_R0  0.8476691 -0.07731269 0.7195411       -0.4365595
## g52470              0.8867676 -0.13828869 0.5193063       -0.3655879
## g47993              0.8611054 -0.11597483 0.5894353       -0.3772177
## g29630              0.8827845 -0.09585291 0.6559283       -0.3887748
## g47496              0.7387244  0.03082643 0.8862983       -0.5004311
##                    p.MM.mediumpurple1     MM.ivory p.MM.ivory MM.sienna3
## g45338                     0.05514308 -0.113280829  0.5981684 -0.4687217
## adi2mcaRNA23838_R0         0.03293603 -0.104790949  0.6260397 -0.4898122
## g52470                     0.07895285 -0.160976283  0.4523870 -0.4498707
## g47993                     0.06919686 -0.117135457  0.5856898 -0.4025608
## g29630                     0.06044149  0.008733315  0.9676937 -0.4328112
## g47496                     0.01275586 -0.054805842  0.7992248 -0.6042434
##                    p.MM.sienna3 MM.blueviolet p.MM.blueviolet  MM.salmon
## g45338              0.020869572   -0.20301049       0.3413965 -0.2734699
## adi2mcaRNA23838_R0  0.015117381   -0.18560630       0.3852184 -0.2843900
## g52470              0.027403905   -0.22221331       0.2966593 -0.1829908
## g47993              0.051142757   -0.16454144       0.4423015 -0.1507126
## g29630              0.034644225   -0.09618013       0.6548246 -0.1895533
## g47496              0.001765421   -0.12730728       0.5533137 -0.4011101
##                    p.MM.salmon MM.salmon4 p.MM.salmon4 MM.navajowhite1
## g45338              0.19600455 -0.3304978  0.114711390    -0.060811939
## adi2mcaRNA23838_R0  0.17802778 -0.3143224  0.134687870    -0.059550190
## g52470              0.39206858 -0.5295940  0.007780626    -0.110626858
## g47993              0.48208436 -0.4220670  0.039932840    -0.099572858
## g29630              0.37501103 -0.3616268  0.082502509    -0.010492606
## g47496              0.05206523 -0.4820504  0.017060547    -0.009676579
##                    p.MM.navajowhite1   MM.plum4   p.MM.plum4   MM.plum3
## g45338                     0.7777337 -0.6737341 3.072176e-04 -0.2367631
## adi2mcaRNA23838_R0         0.7822359 -0.6239314 1.121541e-03 -0.2629280
## g52470                     0.6068248 -0.7970098 3.138602e-06 -0.1578373
## g47993                     0.6434232 -0.7797177 7.070818e-06 -0.2600754
## g29630                     0.9611902 -0.7066813 1.133243e-04 -0.2713312
## g47496                     0.9642065 -0.6704965 3.366227e-04 -0.1614151
##                    p.MM.plum3 MM.thistle p.MM.thistle MM.mediumpurple3
## g45338              0.2653183  0.7562113 1.914040e-05        0.8492135
## adi2mcaRNA23838_R0  0.2144954  0.7823251 6.285031e-06        0.8717877
## g52470              0.4613659  0.7630099 1.451780e-05        0.7510299
## g47993              0.2196934  0.7307971 4.999580e-05        0.8146383
## g29630              0.1996649  0.7451958 2.941444e-05        0.8224768
## g47496              0.4511391  0.8587087 7.912307e-08        0.7133349
##                    p.MM.mediumpurple3 MM.darkslateblue p.MM.darkslateblue
## g45338                   1.544778e-07       -0.8302247       5.187198e-07
## adi2mcaRNA23838_R0       2.896548e-08       -0.8321546       4.618035e-07
## g52470                   2.349125e-05       -0.9313941       3.967272e-11
## g47993                   1.261756e-06       -0.9153719       3.700024e-10
## g29630                   8.155802e-07       -0.8631602       5.685785e-08
## g47496                   9.115891e-05       -0.8982969       2.575264e-09
##                    MM.antiquewhite2 p.MM.antiquewhite2 MM.lightsteelblue
## g45338                    0.9050470       1.249583e-09        0.18690572
## adi2mcaRNA23838_R0        0.9146301       4.058491e-10        0.20508470
## g52470                    0.9200527       2.023213e-10        0.03870560
## g47993                    0.9429430       5.514878e-12        0.14887957
## g29630                    0.9202911       1.960039e-10        0.18129713
## g47496                    0.8826421       1.153691e-08        0.02644146
##                    p.MM.lightsteelblue    MM.blue2 p.MM.blue2 MM.thistle4
## g45338                       0.3818407 -0.13236694  0.5375195  -0.5177210
## adi2mcaRNA23838_R0           0.3363803 -0.16645766  0.4369306  -0.5281305
## g52470                       0.8574962  0.06869654  0.7497614  -0.6550948
## g47993                       0.4874898 -0.05440680  0.8006579  -0.5703450
## g29630                       0.3965407 -0.10663083  0.6199555  -0.5699942
## g47496                       0.9023905  0.04028321  0.8517493  -0.5464551
##                    p.MM.thistle4 MM.skyblue1 p.MM.skyblue1  MM.violet
## g45338              0.0095640412  -0.4303346   0.035810946 -0.5756791
## adi2mcaRNA23838_R0  0.0079842346  -0.4393701   0.031699657 -0.5841647
## g52470              0.0005124587  -0.5169009   0.009698788 -0.3696349
## g47993              0.0036135365  -0.5423985   0.006176574 -0.4696388
## g29630              0.0036389894  -0.5059187   0.011659865 -0.5148645
## g47496              0.0057302122  -0.4406063   0.031167722 -0.3749093
##                    p.MM.violet MM.magenta4 p.MM.magenta4 MM.lightslateblue
## g45338             0.003244662  -0.1719583    0.42170884        0.32125015
## adi2mcaRNA23838_R0 0.002723619  -0.1562047    0.46607206        0.35450703
## g52470             0.075446724  -0.3972806    0.05456266        0.07481121
## g47993             0.020587182  -0.3413178    0.10261774        0.17856381
## g29630             0.010040174  -0.2695866    0.20268501        0.23968128
## g47496             0.071056028  -0.3560950    0.08765931        0.11753108
##                    p.MM.lightslateblue     MM.grey p.MM.grey   MM.coral
## g45338                      0.12584716 0.099303776 0.6443247 -0.7693546
## adi2mcaRNA23838_R0          0.08918333 0.008441108 0.9687742 -0.8038837
## g52470                      0.72827511 0.116321338 0.5883160 -0.6944650
## g47993                      0.40381813 0.149699262 0.4850689 -0.7656651
## g29630                      0.25929837 0.095708924 0.6564141 -0.7693911
## g47496                      0.58441542 0.053817207 0.8027764 -0.6742739
##                      p.MM.coral MM.indianred3 p.MM.indianred3 MM.antiquewhite4
## g45338             1.112444e-05    -0.2463613      0.24585153        0.6593282
## adi2mcaRNA23838_R0 2.223871e-06    -0.4009228      0.05218533        0.6857837
## g52470             1.665045e-04    -0.1921451      0.36839364        0.6132763
## g47993             1.300006e-05    -0.3022224      0.15118416        0.6777148
## g29630             1.110716e-05    -0.2388998      0.26090178        0.6889813
## g47496             3.025389e-04    -0.1540249      0.47239424        0.5890638
##                    p.MM.antiquewhite4  MM.brown2  p.MM.brown2
## g45338                   0.0004576192 -0.5631157 0.0041695023
## adi2mcaRNA23838_R0       0.0002164688 -0.6146645 0.0013936766
## g52470                   0.0014389508 -0.6111816 0.0015096460
## g47993                   0.0002741436 -0.6739891 0.0003050002
## g29630                   0.0001967260 -0.6061352 0.0016922620
## g47496                   0.0024564929 -0.5753800 0.0032644693

Add module cluster in geneInfo

geneInfo <- left_join(geneInfo, moduleCluster, by = "moduleColor")
dim(geneInfo)
## [1] 32772    80
head(geneInfo)
##              gene_id      Accession Bitscore   eValue
## 1             g45338 XP_029205145.1   1048.5 3.5e-302
## 2 adi2mcaRNA23838_R0     EDO45783.1    200.7  2.4e-47
## 3             g52470 XP_029184981.1   2174.8  0.0e+00
## 4             g47993 XP_029198885.1   1842.8  0.0e+00
## 5             g29630 XP_029179365.1    337.8  5.1e-89
## 6             g47496 XP_029189624.1   2915.2  0.0e+00
##                    Description KEGG
## 1                         <NA> <NA>
## 2 Predicted protein (Fragment) <NA>
## 3                         <NA> <NA>
## 4                         <NA> <NA>
## 5                         <NA> <NA>
## 6                         <NA> <NA>
##                                    Annotation.GO.ID
## 1                                     GO:0046872;NA
## 2                                             NA;NA
## 3                                     GO:0005085;NA
## 4                                     GO:0030705;NA
## 5                                     GO:0035556;NA
## 6 GO:0004672; GO:0005515; GO:0005524; GO:0006468;NA
##                                                                  Annotation.GO.Term
## 1                                                              metal ion binding;NA
## 2                                                                             NA;NA
## 3                                     guanyl-nucleotide exchange factor activity;NA
## 4                                 cytoskeleton-dependent intracellular transport;NA
## 5                                              intracellular signal transduction;NA
## 6 protein kinase activity; protein binding; ATP binding; protein phosphorylation;NA
##     moduleColor GS.timepoint p.GS.timepoint MM.honeydew1 p.MM.honeydew1
## 1 antiquewhite2   -0.6782481   0.0002699569    0.6493547   0.0005958406
## 2 antiquewhite2   -0.6497504   0.0005897389    0.6349968   0.0008574642
## 3 antiquewhite2   -0.6471519   0.0006308163    0.6602197   0.0004467424
## 4 antiquewhite2   -0.6299791   0.0009696937    0.6074457   0.0016431148
## 5 antiquewhite2   -0.6146028   0.0013956627    0.5963822   0.0020990586
## 6 antiquewhite2   -0.6137076   0.0014247499    0.6324057   0.0009139368
##      MM.blue  p.MM.blue MM.midnightblue p.MM.midnightblue MM.darkseagreen4
## 1 -0.4294572 0.03623168       0.9421482      6.397458e-12       -0.8078179
## 2 -0.4083695 0.04757572       0.9373262      1.508744e-11       -0.7893554
## 3 -0.4174523 0.04239103       0.9471076      2.443203e-12       -0.8882157
## 4 -0.3642718 0.08011919       0.9386874      1.192598e-11       -0.8296250
## 5 -0.3568866 0.08690693       0.9161760      3.343902e-10       -0.7923920
## 6 -0.4202687 0.04087702       0.9031936      1.532036e-09       -0.8640011
##   p.MM.darkseagreen4 MM.darkmagenta p.MM.darkmagenta MM.lavenderblush2
## 1       1.814830e-06     -0.8706107     3.184625e-08         0.2630509
## 2       4.537968e-06     -0.8912570     5.198605e-09         0.2678851
## 3       6.939186e-09     -0.7764754     8.168592e-06         0.1090637
## 4       5.376382e-07     -0.8013303     2.531398e-06         0.1154398
## 5       3.927583e-06     -0.8058690     2.008223e-06         0.1561628
## 6       5.334823e-08     -0.7655610     1.305685e-05         0.1484546
##   p.MM.lavenderblush2 MM.coral1  p.MM.coral1    MM.blue4 p.MM.blue4     MM.cyan
## 1           0.2142734 0.6971211 1.533834e-04  0.10633882  0.6209195 -0.12895436
## 2           0.2056601 0.7300742 5.130046e-05  0.04140449  0.8476691 -0.07731269
## 3           0.6119475 0.5426716 6.145647e-03  0.03069839  0.8867676 -0.13828869
## 4           0.5911653 0.5914564 2.334346e-03 -0.03771582  0.8611054 -0.11597483
## 5           0.4661934 0.6156474 1.362363e-03  0.03178544  0.8827845 -0.09585291
## 6           0.4887473 0.5505595 5.306410e-03  0.07183075  0.7387244  0.03082643
##   p.MM.cyan MM.mediumpurple1 p.MM.mediumpurple1     MM.ivory p.MM.ivory
## 1 0.5481491       -0.3964102         0.05514308 -0.113280829  0.5981684
## 2 0.7195411       -0.4365595         0.03293603 -0.104790949  0.6260397
## 3 0.5193063       -0.3655879         0.07895285 -0.160976283  0.4523870
## 4 0.5894353       -0.3772177         0.06919686 -0.117135457  0.5856898
## 5 0.6559283       -0.3887748         0.06044149  0.008733315  0.9676937
## 6 0.8862983       -0.5004311         0.01275586 -0.054805842  0.7992248
##   MM.sienna3 p.MM.sienna3 MM.blueviolet p.MM.blueviolet  MM.salmon p.MM.salmon
## 1 -0.4687217  0.020869572   -0.20301049       0.3413965 -0.2734699  0.19600455
## 2 -0.4898122  0.015117381   -0.18560630       0.3852184 -0.2843900  0.17802778
## 3 -0.4498707  0.027403905   -0.22221331       0.2966593 -0.1829908  0.39206858
## 4 -0.4025608  0.051142757   -0.16454144       0.4423015 -0.1507126  0.48208436
## 5 -0.4328112  0.034644225   -0.09618013       0.6548246 -0.1895533  0.37501103
## 6 -0.6042434  0.001765421   -0.12730728       0.5533137 -0.4011101  0.05206523
##   MM.salmon4 p.MM.salmon4 MM.navajowhite1 p.MM.navajowhite1   MM.plum4
## 1 -0.3304978  0.114711390    -0.060811939         0.7777337 -0.6737341
## 2 -0.3143224  0.134687870    -0.059550190         0.7822359 -0.6239314
## 3 -0.5295940  0.007780626    -0.110626858         0.6068248 -0.7970098
## 4 -0.4220670  0.039932840    -0.099572858         0.6434232 -0.7797177
## 5 -0.3616268  0.082502509    -0.010492606         0.9611902 -0.7066813
## 6 -0.4820504  0.017060547    -0.009676579         0.9642065 -0.6704965
##     p.MM.plum4   MM.plum3 p.MM.plum3 MM.thistle p.MM.thistle MM.mediumpurple3
## 1 3.072176e-04 -0.2367631  0.2653183  0.7562113 1.914040e-05        0.8492135
## 2 1.121541e-03 -0.2629280  0.2144954  0.7823251 6.285031e-06        0.8717877
## 3 3.138602e-06 -0.1578373  0.4613659  0.7630099 1.451780e-05        0.7510299
## 4 7.070818e-06 -0.2600754  0.2196934  0.7307971 4.999580e-05        0.8146383
## 5 1.133243e-04 -0.2713312  0.1996649  0.7451958 2.941444e-05        0.8224768
## 6 3.366227e-04 -0.1614151  0.4511391  0.8587087 7.912307e-08        0.7133349
##   p.MM.mediumpurple3 MM.darkslateblue p.MM.darkslateblue MM.antiquewhite2
## 1       1.544778e-07       -0.8302247       5.187198e-07        0.9050470
## 2       2.896548e-08       -0.8321546       4.618035e-07        0.9146301
## 3       2.349125e-05       -0.9313941       3.967272e-11        0.9200527
## 4       1.261756e-06       -0.9153719       3.700024e-10        0.9429430
## 5       8.155802e-07       -0.8631602       5.685785e-08        0.9202911
## 6       9.115891e-05       -0.8982969       2.575264e-09        0.8826421
##   p.MM.antiquewhite2 MM.lightsteelblue p.MM.lightsteelblue    MM.blue2
## 1       1.249583e-09        0.18690572           0.3818407 -0.13236694
## 2       4.058491e-10        0.20508470           0.3363803 -0.16645766
## 3       2.023213e-10        0.03870560           0.8574962  0.06869654
## 4       5.514878e-12        0.14887957           0.4874898 -0.05440680
## 5       1.960039e-10        0.18129713           0.3965407 -0.10663083
## 6       1.153691e-08        0.02644146           0.9023905  0.04028321
##   p.MM.blue2 MM.thistle4 p.MM.thistle4 MM.skyblue1 p.MM.skyblue1  MM.violet
## 1  0.5375195  -0.5177210  0.0095640412  -0.4303346   0.035810946 -0.5756791
## 2  0.4369306  -0.5281305  0.0079842346  -0.4393701   0.031699657 -0.5841647
## 3  0.7497614  -0.6550948  0.0005124587  -0.5169009   0.009698788 -0.3696349
## 4  0.8006579  -0.5703450  0.0036135365  -0.5423985   0.006176574 -0.4696388
## 5  0.6199555  -0.5699942  0.0036389894  -0.5059187   0.011659865 -0.5148645
## 6  0.8517493  -0.5464551  0.0057302122  -0.4406063   0.031167722 -0.3749093
##   p.MM.violet MM.magenta4 p.MM.magenta4 MM.lightslateblue p.MM.lightslateblue
## 1 0.003244662  -0.1719583    0.42170884        0.32125015          0.12584716
## 2 0.002723619  -0.1562047    0.46607206        0.35450703          0.08918333
## 3 0.075446724  -0.3972806    0.05456266        0.07481121          0.72827511
## 4 0.020587182  -0.3413178    0.10261774        0.17856381          0.40381813
## 5 0.010040174  -0.2695866    0.20268501        0.23968128          0.25929837
## 6 0.071056028  -0.3560950    0.08765931        0.11753108          0.58441542
##       MM.grey p.MM.grey   MM.coral   p.MM.coral MM.indianred3 p.MM.indianred3
## 1 0.099303776 0.6443247 -0.7693546 1.112444e-05    -0.2463613      0.24585153
## 2 0.008441108 0.9687742 -0.8038837 2.223871e-06    -0.4009228      0.05218533
## 3 0.116321338 0.5883160 -0.6944650 1.665045e-04    -0.1921451      0.36839364
## 4 0.149699262 0.4850689 -0.7656651 1.300006e-05    -0.3022224      0.15118416
## 5 0.095708924 0.6564141 -0.7693911 1.110716e-05    -0.2388998      0.26090178
## 6 0.053817207 0.8027764 -0.6742739 3.025389e-04    -0.1540249      0.47239424
##   MM.antiquewhite4 p.MM.antiquewhite4  MM.brown2  p.MM.brown2 moduleCluster
## 1        0.6593282       0.0004576192 -0.5631157 0.0041695023             3
## 2        0.6857837       0.0002164688 -0.6146645 0.0013936766             3
## 3        0.6132763       0.0014389508 -0.6111816 0.0015096460             3
## 4        0.6777148       0.0002741436 -0.6739891 0.0003050002             3
## 5        0.6889813       0.0001967260 -0.6061352 0.0016922620             3
## 6        0.5890638       0.0024564929 -0.5753800 0.0032644693             3

Save geneInfo as a CSV

head(geneInfo)
##              gene_id      Accession Bitscore   eValue
## 1             g45338 XP_029205145.1   1048.5 3.5e-302
## 2 adi2mcaRNA23838_R0     EDO45783.1    200.7  2.4e-47
## 3             g52470 XP_029184981.1   2174.8  0.0e+00
## 4             g47993 XP_029198885.1   1842.8  0.0e+00
## 5             g29630 XP_029179365.1    337.8  5.1e-89
## 6             g47496 XP_029189624.1   2915.2  0.0e+00
##                    Description KEGG
## 1                         <NA> <NA>
## 2 Predicted protein (Fragment) <NA>
## 3                         <NA> <NA>
## 4                         <NA> <NA>
## 5                         <NA> <NA>
## 6                         <NA> <NA>
##                                    Annotation.GO.ID
## 1                                     GO:0046872;NA
## 2                                             NA;NA
## 3                                     GO:0005085;NA
## 4                                     GO:0030705;NA
## 5                                     GO:0035556;NA
## 6 GO:0004672; GO:0005515; GO:0005524; GO:0006468;NA
##                                                                  Annotation.GO.Term
## 1                                                              metal ion binding;NA
## 2                                                                             NA;NA
## 3                                     guanyl-nucleotide exchange factor activity;NA
## 4                                 cytoskeleton-dependent intracellular transport;NA
## 5                                              intracellular signal transduction;NA
## 6 protein kinase activity; protein binding; ATP binding; protein phosphorylation;NA
##     moduleColor GS.timepoint p.GS.timepoint MM.honeydew1 p.MM.honeydew1
## 1 antiquewhite2   -0.6782481   0.0002699569    0.6493547   0.0005958406
## 2 antiquewhite2   -0.6497504   0.0005897389    0.6349968   0.0008574642
## 3 antiquewhite2   -0.6471519   0.0006308163    0.6602197   0.0004467424
## 4 antiquewhite2   -0.6299791   0.0009696937    0.6074457   0.0016431148
## 5 antiquewhite2   -0.6146028   0.0013956627    0.5963822   0.0020990586
## 6 antiquewhite2   -0.6137076   0.0014247499    0.6324057   0.0009139368
##      MM.blue  p.MM.blue MM.midnightblue p.MM.midnightblue MM.darkseagreen4
## 1 -0.4294572 0.03623168       0.9421482      6.397458e-12       -0.8078179
## 2 -0.4083695 0.04757572       0.9373262      1.508744e-11       -0.7893554
## 3 -0.4174523 0.04239103       0.9471076      2.443203e-12       -0.8882157
## 4 -0.3642718 0.08011919       0.9386874      1.192598e-11       -0.8296250
## 5 -0.3568866 0.08690693       0.9161760      3.343902e-10       -0.7923920
## 6 -0.4202687 0.04087702       0.9031936      1.532036e-09       -0.8640011
##   p.MM.darkseagreen4 MM.darkmagenta p.MM.darkmagenta MM.lavenderblush2
## 1       1.814830e-06     -0.8706107     3.184625e-08         0.2630509
## 2       4.537968e-06     -0.8912570     5.198605e-09         0.2678851
## 3       6.939186e-09     -0.7764754     8.168592e-06         0.1090637
## 4       5.376382e-07     -0.8013303     2.531398e-06         0.1154398
## 5       3.927583e-06     -0.8058690     2.008223e-06         0.1561628
## 6       5.334823e-08     -0.7655610     1.305685e-05         0.1484546
##   p.MM.lavenderblush2 MM.coral1  p.MM.coral1    MM.blue4 p.MM.blue4     MM.cyan
## 1           0.2142734 0.6971211 1.533834e-04  0.10633882  0.6209195 -0.12895436
## 2           0.2056601 0.7300742 5.130046e-05  0.04140449  0.8476691 -0.07731269
## 3           0.6119475 0.5426716 6.145647e-03  0.03069839  0.8867676 -0.13828869
## 4           0.5911653 0.5914564 2.334346e-03 -0.03771582  0.8611054 -0.11597483
## 5           0.4661934 0.6156474 1.362363e-03  0.03178544  0.8827845 -0.09585291
## 6           0.4887473 0.5505595 5.306410e-03  0.07183075  0.7387244  0.03082643
##   p.MM.cyan MM.mediumpurple1 p.MM.mediumpurple1     MM.ivory p.MM.ivory
## 1 0.5481491       -0.3964102         0.05514308 -0.113280829  0.5981684
## 2 0.7195411       -0.4365595         0.03293603 -0.104790949  0.6260397
## 3 0.5193063       -0.3655879         0.07895285 -0.160976283  0.4523870
## 4 0.5894353       -0.3772177         0.06919686 -0.117135457  0.5856898
## 5 0.6559283       -0.3887748         0.06044149  0.008733315  0.9676937
## 6 0.8862983       -0.5004311         0.01275586 -0.054805842  0.7992248
##   MM.sienna3 p.MM.sienna3 MM.blueviolet p.MM.blueviolet  MM.salmon p.MM.salmon
## 1 -0.4687217  0.020869572   -0.20301049       0.3413965 -0.2734699  0.19600455
## 2 -0.4898122  0.015117381   -0.18560630       0.3852184 -0.2843900  0.17802778
## 3 -0.4498707  0.027403905   -0.22221331       0.2966593 -0.1829908  0.39206858
## 4 -0.4025608  0.051142757   -0.16454144       0.4423015 -0.1507126  0.48208436
## 5 -0.4328112  0.034644225   -0.09618013       0.6548246 -0.1895533  0.37501103
## 6 -0.6042434  0.001765421   -0.12730728       0.5533137 -0.4011101  0.05206523
##   MM.salmon4 p.MM.salmon4 MM.navajowhite1 p.MM.navajowhite1   MM.plum4
## 1 -0.3304978  0.114711390    -0.060811939         0.7777337 -0.6737341
## 2 -0.3143224  0.134687870    -0.059550190         0.7822359 -0.6239314
## 3 -0.5295940  0.007780626    -0.110626858         0.6068248 -0.7970098
## 4 -0.4220670  0.039932840    -0.099572858         0.6434232 -0.7797177
## 5 -0.3616268  0.082502509    -0.010492606         0.9611902 -0.7066813
## 6 -0.4820504  0.017060547    -0.009676579         0.9642065 -0.6704965
##     p.MM.plum4   MM.plum3 p.MM.plum3 MM.thistle p.MM.thistle MM.mediumpurple3
## 1 3.072176e-04 -0.2367631  0.2653183  0.7562113 1.914040e-05        0.8492135
## 2 1.121541e-03 -0.2629280  0.2144954  0.7823251 6.285031e-06        0.8717877
## 3 3.138602e-06 -0.1578373  0.4613659  0.7630099 1.451780e-05        0.7510299
## 4 7.070818e-06 -0.2600754  0.2196934  0.7307971 4.999580e-05        0.8146383
## 5 1.133243e-04 -0.2713312  0.1996649  0.7451958 2.941444e-05        0.8224768
## 6 3.366227e-04 -0.1614151  0.4511391  0.8587087 7.912307e-08        0.7133349
##   p.MM.mediumpurple3 MM.darkslateblue p.MM.darkslateblue MM.antiquewhite2
## 1       1.544778e-07       -0.8302247       5.187198e-07        0.9050470
## 2       2.896548e-08       -0.8321546       4.618035e-07        0.9146301
## 3       2.349125e-05       -0.9313941       3.967272e-11        0.9200527
## 4       1.261756e-06       -0.9153719       3.700024e-10        0.9429430
## 5       8.155802e-07       -0.8631602       5.685785e-08        0.9202911
## 6       9.115891e-05       -0.8982969       2.575264e-09        0.8826421
##   p.MM.antiquewhite2 MM.lightsteelblue p.MM.lightsteelblue    MM.blue2
## 1       1.249583e-09        0.18690572           0.3818407 -0.13236694
## 2       4.058491e-10        0.20508470           0.3363803 -0.16645766
## 3       2.023213e-10        0.03870560           0.8574962  0.06869654
## 4       5.514878e-12        0.14887957           0.4874898 -0.05440680
## 5       1.960039e-10        0.18129713           0.3965407 -0.10663083
## 6       1.153691e-08        0.02644146           0.9023905  0.04028321
##   p.MM.blue2 MM.thistle4 p.MM.thistle4 MM.skyblue1 p.MM.skyblue1  MM.violet
## 1  0.5375195  -0.5177210  0.0095640412  -0.4303346   0.035810946 -0.5756791
## 2  0.4369306  -0.5281305  0.0079842346  -0.4393701   0.031699657 -0.5841647
## 3  0.7497614  -0.6550948  0.0005124587  -0.5169009   0.009698788 -0.3696349
## 4  0.8006579  -0.5703450  0.0036135365  -0.5423985   0.006176574 -0.4696388
## 5  0.6199555  -0.5699942  0.0036389894  -0.5059187   0.011659865 -0.5148645
## 6  0.8517493  -0.5464551  0.0057302122  -0.4406063   0.031167722 -0.3749093
##   p.MM.violet MM.magenta4 p.MM.magenta4 MM.lightslateblue p.MM.lightslateblue
## 1 0.003244662  -0.1719583    0.42170884        0.32125015          0.12584716
## 2 0.002723619  -0.1562047    0.46607206        0.35450703          0.08918333
## 3 0.075446724  -0.3972806    0.05456266        0.07481121          0.72827511
## 4 0.020587182  -0.3413178    0.10261774        0.17856381          0.40381813
## 5 0.010040174  -0.2695866    0.20268501        0.23968128          0.25929837
## 6 0.071056028  -0.3560950    0.08765931        0.11753108          0.58441542
##       MM.grey p.MM.grey   MM.coral   p.MM.coral MM.indianred3 p.MM.indianred3
## 1 0.099303776 0.6443247 -0.7693546 1.112444e-05    -0.2463613      0.24585153
## 2 0.008441108 0.9687742 -0.8038837 2.223871e-06    -0.4009228      0.05218533
## 3 0.116321338 0.5883160 -0.6944650 1.665045e-04    -0.1921451      0.36839364
## 4 0.149699262 0.4850689 -0.7656651 1.300006e-05    -0.3022224      0.15118416
## 5 0.095708924 0.6564141 -0.7693911 1.110716e-05    -0.2388998      0.26090178
## 6 0.053817207 0.8027764 -0.6742739 3.025389e-04    -0.1540249      0.47239424
##   MM.antiquewhite4 p.MM.antiquewhite4  MM.brown2  p.MM.brown2 moduleCluster
## 1        0.6593282       0.0004576192 -0.5631157 0.0041695023             3
## 2        0.6857837       0.0002164688 -0.6146645 0.0013936766             3
## 3        0.6132763       0.0014389508 -0.6111816 0.0015096460             3
## 4        0.6777148       0.0002741436 -0.6739891 0.0003050002             3
## 5        0.6889813       0.0001967260 -0.6061352 0.0016922620             3
## 6        0.5890638       0.0024564929 -0.5753800 0.0032644693             3
geneInfo$Annotation.GO.ID <- gsub(";NA", "", geneInfo$Annotation.GO.ID) #Remove NAs
geneInfo$Annotation.GO.ID <- gsub("NA", "", geneInfo$Annotation.GO.ID) #Remove NAs

write.csv(geneInfo, file = "2a-WGCNA/Output/geneInfo.csv")

Gene Ontology Analysis of each developmental phase: Maternal, Early ZGA, Late ZGA, Adult and other individual life stages

Obtain module color of all expressed genes (poverA = 0.85,5). Select all significantly-expressed maternal genes (p<0.05)

#Prepare dataframe
genes.GO <- as.data.frame(t(datExpr))
genes.GO <- cbind(gene_id = rownames(genes.GO), genes.GO)
rownames(genes.GO) <- NULL

#Maternal
genesMat.GO <- genes.GO[,c(1:6)]
geneColor <- geneInfo %>% select(gene_id, moduleColor) #Make a dataframe containing just gene_id and moduleColor
geneColor$gene_id <- as.factor(geneColor$gene_id) #Make factor for merge
genesMat.GO$gene_id <- as.factor(genesMat.GO$gene_id) #Make factor for merge
genesMat.GO <- merge(geneColor, genesMat.GO) #append module information
head(genesMat.GO)
##              gene_id moduleColor      X119      X120      X121      X127
## 1 adi2mcaRNA10013_R0   honeydew1 13.856733 13.718850 14.020625 13.874230
## 2 adi2mcaRNA10017_R1   honeydew1 10.130987 10.286848 10.131647  9.909535
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan 10.323999 10.436964 10.185860 10.305153
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991  8.873991  8.873991
##        X132
## 1 13.986782
## 2  9.869312
## 3  8.873991
## 4 10.566842
## 5  8.873991
## 6  8.873991
MatColors <- c("grey", "coral1", "lightslateblue", "mediumpurple3", "antiquewhite2", "antiquewhite4", "thistle", "honeydew1", "midnightblue")
genesMat.GO.sig <- filter(genesMat.GO, moduleColor%in%MatColors)
head(genesMat.GO.sig)
##              gene_id moduleColor     X119      X120      X121      X127
## 1 adi2mcaRNA10013_R0   honeydew1 13.85673 13.718850 14.020625 13.874230
## 2 adi2mcaRNA10017_R1   honeydew1 10.13099 10.286848 10.131647  9.909535
## 3  adi2mcaRNA1007_R1   honeydew1  9.04570  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10085_R0   honeydew1 11.76513 11.960402 11.848386 12.258040
## 5  adi2mcaRNA1009_R0   honeydew1 10.30531 10.284481 10.474349 10.607037
## 6 adi2mcaRNA10296_R0   honeydew1 12.71266 12.980859 13.229758 13.712136
##        X132
## 1 13.986782
## 2  9.869312
## 3  9.366546
## 4 12.073178
## 5 10.475260
## 6 13.580090
dim(genesMat.GO.sig)
## [1] 4175    7
#Unfertilized Unfertilized Eggs
genesUE.GO <- genes.GO[,c(1:4)]
geneColor <- geneInfo %>% select(gene_id, moduleColor) #Make a dataframe containing just gene_id and moduleColor
geneColor$gene_id <- as.factor(geneColor$gene_id) #Make factor for merge
genesUE.GO$gene_id <- as.factor(genesUE.GO$gene_id) #Make factor for merge
genesUE.GO <- merge(geneColor, genesUE.GO) #append module information
head(genesUE.GO)
##              gene_id moduleColor      X119      X120      X121
## 1 adi2mcaRNA10013_R0   honeydew1 13.856733 13.718850 14.020625
## 2 adi2mcaRNA10017_R1   honeydew1 10.130987 10.286848 10.131647
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan 10.323999 10.436964 10.185860
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991  8.873991
UEColors <- c("grey", "coral1", "mediumpurple3", "antiquewhite2", "antiquewhite4", "thistle", "honeydew1", "midnightblue")
genesUE.GO.sig <- filter(genesUE.GO, moduleColor%in%UEColors)
head(genesUE.GO.sig)
##              gene_id moduleColor     X119      X120      X121
## 1 adi2mcaRNA10013_R0   honeydew1 13.85673 13.718850 14.020625
## 2 adi2mcaRNA10017_R1   honeydew1 10.13099 10.286848 10.131647
## 3  adi2mcaRNA1007_R1   honeydew1  9.04570  8.873991  8.873991
## 4 adi2mcaRNA10085_R0   honeydew1 11.76513 11.960402 11.848386
## 5  adi2mcaRNA1009_R0   honeydew1 10.30531 10.284481 10.474349
## 6 adi2mcaRNA10296_R0   honeydew1 12.71266 12.980859 13.229758
dim(genesUE.GO.sig)
## [1] 4044    5
#Fertilized Eggs
genesFE.GO <- genes.GO[,c(1, 5:6)]
geneColor <- geneInfo %>% select(gene_id, moduleColor) #Make a dataframe containing just gene_id and moduleColor
geneColor$gene_id <- as.factor(geneColor$gene_id) #Make factor for merge
genesFE.GO$gene_id <- as.factor(genesFE.GO$gene_id) #Make factor for merge
genesFE.GO <- merge(geneColor, genesFE.GO) #append module information
head(genesFE.GO)
##              gene_id moduleColor      X127      X132
## 1 adi2mcaRNA10013_R0   honeydew1 13.874230 13.986782
## 2 adi2mcaRNA10017_R1   honeydew1  9.909535  9.869312
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan 10.305153 10.566842
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991
FEColors <- c("coral1", "lightslateblue", "mediumpurple3", "antiquewhite2", "midnightblue")
genesFE.GO.sig <- filter(genesFE.GO, moduleColor%in%FEColors)
head(genesFE.GO.sig)
##              gene_id   moduleColor     X127     X132
## 1 adi2mcaRNA10458_R1 mediumpurple3 10.76543 11.18311
## 2  adi2mcaRNA1049_R0  midnightblue 11.81835 11.81929
## 3 adi2mcaRNA10524_R0  midnightblue 10.21771 10.40150
## 4 adi2mcaRNA10614_R0  midnightblue 12.78106 12.77558
## 5 adi2mcaRNA10800_R0        coral1 11.86172 11.47727
## 6 adi2mcaRNA11052_R0        coral1 11.60275 11.77849
dim(genesFE.GO.sig)
## [1] 2035    4
#Early ZGA
genesZGA1.GO <- genes.GO[,c(1, 7:15)]
genesZGA1.GO$gene_id <- as.factor(genesZGA1.GO$gene_id) #Make factor for merge
genesZGA1.GO <- merge(geneColor, genesZGA1.GO) #append module info ZGA1ion
head(genesZGA1.GO)
##              gene_id moduleColor      X153      X154      X159      X162
## 1 adi2mcaRNA10013_R0   honeydew1 14.038510 14.281777 14.407036 13.730070
## 2 adi2mcaRNA10017_R1   honeydew1  9.780544  9.825871  9.793535 10.025731
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan 10.291471 10.108466 10.053651  9.690496
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991  8.873991  9.109010
##        X163      X167      X179      X180      X184
## 1 13.862217 13.757265 12.225041 11.888140 12.070319
## 2  9.985553  9.950188  9.759188  9.807519  9.833320
## 3  8.873991  8.873991  9.082857  8.873991  9.028027
## 4  9.691840  9.711188  9.777535  9.493523  9.783506
## 5  8.873991  8.873991  8.873991  8.873991  8.873991
## 6  8.873991  8.873991  8.873991  8.873991  8.873991
ZGA1Colors <- c("magenta4","indianred3", "blue2", "plum3", "blue4", "skyblue1", "brown2", "coral", "darkslateblue", "plum4", "violet")
genesZGA1.GO.sig <- filter(genesZGA1.GO, moduleColor%in%ZGA1Colors)
head(genesZGA1.GO.sig)
##              gene_id   moduleColor      X153      X154      X159      X162
## 1 adi2mcaRNA10081_R0         coral  9.913390 10.223128 10.294565 10.691543
## 2  adi2mcaRNA1019_R0        violet  9.883579  9.887090  9.977302 10.064821
## 3 adi2mcaRNA10224_R7    indianred3  8.873991  8.873991  8.873991  9.132508
## 4 adi2mcaRNA10301_R1 darkslateblue  8.873991  8.873991  8.873991  8.873991
## 5 adi2mcaRNA10301_R2 darkslateblue  8.873991  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10307_R0         plum3 11.006117 10.999390 10.786402 11.030755
##        X163      X167      X179      X180      X184
## 1 10.775424 10.755006 10.752304 10.808031 10.963879
## 2  9.995844  9.981283 10.167184 10.111774 10.098349
## 3  9.098930  9.039250  8.873991  8.873991  8.873991
## 4  9.048298  8.873991  9.229090  9.146984  9.196580
## 5  9.016341  9.014945  9.300853  9.126787  9.102333
## 6 11.021086 11.077857 11.062527 10.646149 10.938252
dim(genesZGA1.GO.sig)
## [1] 4407   11
#Cleavage
genesClvg.GO <- genes.GO[,c(1, 7:9)]
genesClvg.GO$gene_id <- as.factor(genesClvg.GO$gene_id) #Make factor for merge
genesClvg.GO <- merge(geneColor, genesClvg.GO) #append module info Clvgion
head(genesClvg.GO)
##              gene_id moduleColor      X153      X154      X159
## 1 adi2mcaRNA10013_R0   honeydew1 14.038510 14.281777 14.407036
## 2 adi2mcaRNA10017_R1   honeydew1  9.780544  9.825871  9.793535
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan 10.291471 10.108466 10.053651
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991  8.873991
ClvgColors <- c("blue2", "violet")
genesClvg.GO.sig <- filter(genesClvg.GO, moduleColor%in%ClvgColors)
head(genesClvg.GO.sig)
##              gene_id moduleColor      X153      X154      X159
## 1  adi2mcaRNA1019_R0      violet  9.883579  9.887090  9.977302
## 2 adi2mcaRNA10355_R0       blue2  9.571031  9.357560  9.799292
## 3 adi2mcaRNA10492_R0       blue2 10.587904 10.711193 10.521225
## 4 adi2mcaRNA10492_R1       blue2 10.231079 10.737733 10.359272
## 5 adi2mcaRNA10799_R1      violet  9.635006  9.546324  9.382469
## 6 adi2mcaRNA10878_R2       blue2  9.552449  9.587314  9.675300
dim(genesClvg.GO.sig)
## [1] 1577    5
#Prawn Chip
genesPC.GO <- genes.GO[,c(1, 10:12)]
genesPC.GO$gene_id <- as.factor(genesPC.GO$gene_id) #Make factor for merge
genesPC.GO <- merge(geneColor, genesPC.GO) #append module info PCion
head(genesPC.GO)
##              gene_id moduleColor      X162      X163      X167
## 1 adi2mcaRNA10013_R0   honeydew1 13.730070 13.862217 13.757265
## 2 adi2mcaRNA10017_R1   honeydew1 10.025731  9.985553  9.950188
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan  9.690496  9.691840  9.711188
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  9.109010  8.873991  8.873991
PCColors <- c("indianred3", "blue2", "plum3", "blue4", "coral", "violet")
genesPC.GO.sig <- filter(genesPC.GO, moduleColor%in%PCColors)
head(genesPC.GO.sig)
##              gene_id moduleColor      X162      X163      X167
## 1 adi2mcaRNA10081_R0       coral 10.691543 10.775424 10.755006
## 2  adi2mcaRNA1019_R0      violet 10.064821  9.995844  9.981283
## 3 adi2mcaRNA10224_R7  indianred3  9.132508  9.098930  9.039250
## 4 adi2mcaRNA10307_R0       plum3 11.030755 11.021086 11.077857
## 5 adi2mcaRNA10355_R0       blue2  9.580941  9.369972  9.537011
## 6 adi2mcaRNA10492_R0       blue2 10.475103 10.528184 10.464401
dim(genesPC.GO.sig)
## [1] 2638    5
#Early Gastrula
genesEG.GO <- genes.GO[,c(1, 13:15)]
genesEG.GO$gene_id <- as.factor(genesEG.GO$gene_id) #Make factor for merge
genesEG.GO <- merge(geneColor, genesEG.GO) #append module info EGion
head(genesEG.GO)
##              gene_id moduleColor      X179      X180      X184
## 1 adi2mcaRNA10013_R0   honeydew1 12.225041 11.888140 12.070319
## 2 adi2mcaRNA10017_R1   honeydew1  9.759188  9.807519  9.833320
## 3 adi2mcaRNA10018_R0        blue  9.082857  8.873991  9.028027
## 4 adi2mcaRNA10022_R0        cyan  9.777535  9.493523  9.783506
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991  8.873991
EGColors <- c("magenta4", "plum3", "blue4", "skyblue1", "brown2", "coral", "darkslateblue", "plum4")
genesEG.GO.sig <- filter(genesEG.GO, moduleColor%in%EGColors)
head(genesEG.GO.sig)
##              gene_id   moduleColor      X179      X180      X184
## 1 adi2mcaRNA10081_R0         coral 10.752304 10.808031 10.963879
## 2 adi2mcaRNA10301_R1 darkslateblue  9.229090  9.146984  9.196580
## 3 adi2mcaRNA10301_R2 darkslateblue  9.300853  9.126787  9.102333
## 4 adi2mcaRNA10307_R0         plum3 11.062527 10.646149 10.938252
## 5 adi2mcaRNA10557_R0         plum3  9.471962  9.571822  9.434587
## 6 adi2mcaRNA10579_R0         coral 12.282315 12.588345 12.248355
dim(genesEG.GO.sig)
## [1] 2685    5
#Late ZGA
genesZGA2.GO <- genes.GO[,c(1, 16:22)]
genesZGA2.GO$gene_id <- as.factor(genesZGA2.GO$gene_id) #Make factor for merge
genesZGA2.GO <- merge(geneColor, genesZGA2.GO) #append module info ZGA2ion
head(genesZGA2.GO)
##              gene_id moduleColor      X212      X215      X218      X221
## 1 adi2mcaRNA10013_R0   honeydew1 11.264607 11.262483 11.524604 11.469398
## 2 adi2mcaRNA10017_R1   honeydew1  9.527062  9.650037  9.458826  9.292660
## 3 adi2mcaRNA10018_R0        blue  8.962390  8.873991  8.873991  9.012454
## 4 adi2mcaRNA10022_R0        cyan  9.081160  9.329598  9.414579  9.470327
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  9.598113  9.426005 10.588162 10.566363
##        X359      X361      X375
## 1 11.856368 11.760712 11.971630
## 2  9.344819  9.354966  9.290794
## 3  9.270761  9.259076  9.271063
## 4 10.211142 10.159268 10.249013
## 5  9.513902  9.358264  9.601831
## 6 10.529897 10.637113 10.634318
ZGA2Colors <- c("magenta4", "skyblue1", "darkseagreen", "darkslateblue", "thistle4", "salmon4", "mediumpurple1", "sienna3", "salmon", "blue")
genesZGA2.GO.sig <- filter(genesZGA2.GO, moduleColor%in%ZGA2Colors)
head(genesZGA2.GO.sig)
##              gene_id moduleColor      X212      X215      X218      X221
## 1 adi2mcaRNA10018_R0        blue  8.962390  8.873991  8.873991  9.012454
## 2 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991  8.873991
## 3 adi2mcaRNA10043_R1     sienna3  9.598113  9.426005 10.588162 10.566363
## 4 adi2mcaRNA10073_R0        blue 12.018725 11.878120 12.174456 11.754813
## 5 adi2mcaRNA10211_R5        blue  9.027055  8.978927  9.037852  9.025658
## 6  adi2mcaRNA1022_R0        blue  9.845965  9.774315  9.960113 10.209558
##        X359      X361      X375
## 1  9.270761  9.259076  9.271063
## 2  9.513902  9.358264  9.601831
## 3 10.529897 10.637113 10.634318
## 4 12.558500 12.537344 12.715980
## 5  9.103547  9.062726  9.144205
## 6 10.352927 10.341156 10.639776
dim(genesZGA2.GO.sig)
## [1] 14953     9
#Mid Gastrula
genesMG.GO <- genes.GO[,c(1, 16:17)]
genesMG.GO$gene_id <- as.factor(genesMG.GO$gene_id) #Make factor for merge
genesMG.GO <- merge(geneColor, genesMG.GO) #append module info MGion
head(genesMG.GO)
##              gene_id moduleColor      X212      X215
## 1 adi2mcaRNA10013_R0   honeydew1 11.264607 11.262483
## 2 adi2mcaRNA10017_R1   honeydew1  9.527062  9.650037
## 3 adi2mcaRNA10018_R0        blue  8.962390  8.873991
## 4 adi2mcaRNA10022_R0        cyan  9.081160  9.329598
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  9.598113  9.426005
MGColors <- c("magenta4", "skyblue1", "darkseagreen", "darkslateblue", "thistle4", "salmon4")
genesMG.GO.sig <- filter(genesMG.GO, moduleColor%in%MGColors)
head(genesMG.GO.sig)
##              gene_id   moduleColor     X212     X215
## 1 adi2mcaRNA10301_R1 darkslateblue 9.146095 9.213312
## 2 adi2mcaRNA10301_R2 darkslateblue 9.160013 9.179424
## 3 adi2mcaRNA10726_R3 darkslateblue 9.579740 9.511527
## 4 adi2mcaRNA10825_R8 darkslateblue 9.179690 9.544098
## 5 adi2mcaRNA10885_R4 darkslateblue 9.050707 9.141201
## 6 adi2mcaRNA10907_R0 darkslateblue 9.027055 9.119843
dim(genesMG.GO.sig)
## [1] 1771    4
#Late Gastrula
genesLG.GO <- genes.GO[,c(1, 18:19)]
genesLG.GO$gene_id <- as.factor(genesLG.GO$gene_id) #Make factor for merge
genesLG.GO <- merge(geneColor, genesLG.GO) #append module info LGion
head(genesLG.GO)
##              gene_id moduleColor      X218      X221
## 1 adi2mcaRNA10013_R0   honeydew1 11.524604 11.469398
## 2 adi2mcaRNA10017_R1   honeydew1  9.458826  9.292660
## 3 adi2mcaRNA10018_R0        blue  8.873991  9.012454
## 4 adi2mcaRNA10022_R0        cyan  9.414579  9.470327
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3 10.588162 10.566363
LGColors <- c("mediumpurple1", "sienna3", "thistle4", "salmon4")
genesLG.GO.sig <- filter(genesLG.GO, moduleColor%in%LGColors)
head(genesLG.GO.sig)
##              gene_id   moduleColor      X218      X221
## 1 adi2mcaRNA10043_R1       sienna3 10.588162 10.566363
## 2 adi2mcaRNA10804_R5       sienna3  9.132869  9.264579
## 3 adi2mcaRNA10928_R8       sienna3  9.074625  9.170552
## 4 adi2mcaRNA11115_R3 mediumpurple1  9.045840  9.164056
## 5 adi2mcaRNA11304_R1       sienna3  9.321190  9.421293
## 6 adi2mcaRNA11304_R2       sienna3  9.093739  9.259695
dim(genesLG.GO.sig)
## [1] 1237    4
#Planula
genesPln.GO <- genes.GO[,c(1, 20:22)]
genesPln.GO$gene_id <- as.factor(genesPln.GO$gene_id) #Make factor for merge
genesPln.GO <- merge(geneColor, genesPln.GO) #append module info Plnion
head(genesPln.GO)
##              gene_id moduleColor      X359      X361      X375
## 1 adi2mcaRNA10013_R0   honeydew1 11.856368 11.760712 11.971630
## 2 adi2mcaRNA10017_R1   honeydew1  9.344819  9.354966  9.290794
## 3 adi2mcaRNA10018_R0        blue  9.270761  9.259076  9.271063
## 4 adi2mcaRNA10022_R0        cyan 10.211142 10.159268 10.249013
## 5 adi2mcaRNA10025_R0        blue  9.513902  9.358264  9.601831
## 6 adi2mcaRNA10043_R1     sienna3 10.529897 10.637113 10.634318
PlnColors <- c("blue", "salmon", "sienna3")
genesPln.GO.sig <- filter(genesPln.GO, moduleColor%in%PlnColors)
head(genesPln.GO.sig)
##              gene_id moduleColor      X359      X361      X375
## 1 adi2mcaRNA10018_R0        blue  9.270761  9.259076  9.271063
## 2 adi2mcaRNA10025_R0        blue  9.513902  9.358264  9.601831
## 3 adi2mcaRNA10043_R1     sienna3 10.529897 10.637113 10.634318
## 4 adi2mcaRNA10073_R0        blue 12.558500 12.537344 12.715980
## 5 adi2mcaRNA10211_R5        blue  9.103547  9.062726  9.144205
## 6  adi2mcaRNA1022_R0        blue 10.352927 10.341156 10.639776
dim(genesPln.GO.sig)
## [1] 13122     5
#Adult
genesAdult.GO <- genes.GO[,c(1, 23:25)]
genesAdult.GO$gene_id <- as.factor(genesAdult.GO$gene_id) #Make factor for merge
genesAdult.GO <- merge(geneColor, genesAdult.GO) #append module info Adult
head(genesAdult.GO)
##              gene_id moduleColor     X1101     X1548     X1628
## 1 adi2mcaRNA10013_R0   honeydew1 11.856902 11.747998 11.391425
## 2 adi2mcaRNA10017_R1   honeydew1  9.341403  9.412548  9.311949
## 3 adi2mcaRNA10018_R0        blue  8.873991  9.104723  9.124311
## 4 adi2mcaRNA10022_R0        cyan 10.800664 11.260274 11.370141
## 5 adi2mcaRNA10025_R0        blue  9.521989  9.820412  9.592289
## 6 adi2mcaRNA10043_R1     sienna3  9.039879  8.873991  9.133735
AdultColors <- c("antiquewhite4", "thistle", "navajowhite1", "blue", "cyan", "blueviolet", "ivory")
genesAdult.GO.sig <- filter(genesAdult.GO, moduleColor%in%AdultColors)
head(genesAdult.GO.sig)
##              gene_id moduleColor     X1101     X1548     X1628
## 1 adi2mcaRNA10018_R0        blue  8.873991  9.104723  9.124311
## 2 adi2mcaRNA10022_R0        cyan 10.800664 11.260274 11.370141
## 3 adi2mcaRNA10025_R0        blue  9.521989  9.820412  9.592289
## 4 adi2mcaRNA10072_R0        cyan 10.116842  9.905310  9.665687
## 5 adi2mcaRNA10073_R0        blue 12.489762 12.218500 11.913712
## 6 adi2mcaRNA10172_R4        cyan  9.348586  8.873991  8.873991
dim(genesAdult.GO.sig)
## [1] 16226     5

Find the length of each gene using the stringtie_merged.gtf as a map

map <- read.csv(file="1-QC-Align-Assemble/Output/stringtie_merged.gtf", header=FALSE, sep="\t", skip=2) #load sample info
map <- subset(map, V3=="transcript")
map <- map[,c(1,4,5,9)]
map <- separate(map, V9, into = c("gene_id", "transcript_id", "gene_name"), sep=";")
## Warning: Expected 3 pieces. Additional pieces discarded in 63227 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
map$gene_id <- gsub("gene_id ","",map$gene_id) #remove extra characters
map$gene_id <- gsub(" ","",map$gene_id) #remove extra characters
map$transcript_id <- gsub("transcript_id ","",map$transcript_id) #remove extra characters
map$transcript_id <- gsub(" ","",map$transcript_id) #remove extra characters
map$gene_name <- gsub("ref_gene_id ","",map$gene_name) #remove extra characters
# map$gene_name <- gsub("gene_name ","",map$gene_name) #remove extra characters
map$gene_name <- gsub(" ","",map$gene_name) #remove extra characters
# map$gene_name <- gsub("xlocXLOC_[0-9][0-9][0-9][0-9][0-9][0-9]", "unknown", map$gene_name)
colnames(map) <- c("scaffold", "start", "stop", "gene_name", "transcript_id", "gene_id")
dim(map)
## [1] 63227     6
head(map)
##    scaffold  start   stop gene_name transcript_id gene_id
## 1         1  37447  52266   MSTRG.1     g21534.t1  g21534
## 7         1  58322  62557   MSTRG.2     g21535.t1  g21535
## 10        1  64466  84798   MSTRG.3     g21536.t1  g21536
## 22        1  88347  97184   MSTRG.4     g21537.t1  g21537
## 30        1 100215 109729   MSTRG.5     g21538.t1  g21538
## 36        1 109867 128510   MSTRG.6     g21539.t1  g21539
map <- filter(map, gene_id %in% genes.GO$gene_id) #Filter for those genes in our set of interest
dim(map) #Should be 32772
## [1] 32772     6
#Calculate gene length
map <- map %>% mutate(gene_length=(map$stop-map$start))
map <- select(map, gene_id, gene_length)

Build a data frame that links the gene IDs, modules, and counts of expressed maternal genes (poverA = 0.09,5) and the gene lengths.

GOref <- merge(genes.GO, map, by.x="gene_id")
head(GOref)
##              gene_id      X119      X120      X121      X127      X132
## 1 adi2mcaRNA10013_R0 13.856733 13.718850 14.020625 13.874230 13.986782
## 2 adi2mcaRNA10017_R1 10.130987 10.286848 10.131647  9.909535  9.869312
## 3 adi2mcaRNA10018_R0  8.873991  8.873991  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0 10.323999 10.436964 10.185860 10.305153 10.566842
## 5 adi2mcaRNA10025_R0  8.873991  8.873991  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1  8.873991  8.873991  8.873991  8.873991  8.873991
##        X153      X154      X159      X162      X163      X167      X179
## 1 14.038510 14.281777 14.407036 13.730070 13.862217 13.757265 12.225041
## 2  9.780544  9.825871  9.793535 10.025731  9.985553  9.950188  9.759188
## 3  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991  9.082857
## 4 10.291471 10.108466 10.053651  9.690496  9.691840  9.711188  9.777535
## 5  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991
## 6  8.873991  8.873991  8.873991  9.109010  8.873991  8.873991  8.873991
##        X180      X184      X212      X215      X218      X221      X359
## 1 11.888140 12.070319 11.264607 11.262483 11.524604 11.469398 11.856368
## 2  9.807519  9.833320  9.527062  9.650037  9.458826  9.292660  9.344819
## 3  8.873991  9.028027  8.962390  8.873991  8.873991  9.012454  9.270761
## 4  9.493523  9.783506  9.081160  9.329598  9.414579  9.470327 10.211142
## 5  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991  9.513902
## 6  8.873991  8.873991  9.598113  9.426005 10.588162 10.566363 10.529897
##        X361      X375     X1101     X1548     X1628 gene_length
## 1 11.760712 11.971630 11.856902 11.747998 11.391425       26063
## 2  9.354966  9.290794  9.341403  9.412548  9.311949         880
## 3  9.259076  9.271063  8.873991  9.104723  9.124311        5012
## 4 10.159268 10.249013 10.800664 11.260274 11.370141       57034
## 5  9.358264  9.601831  9.521989  9.820412  9.592289       18060
## 6 10.637113 10.634318  9.039879  8.873991  9.133735        4816
dim(GOref) #Should have 32772
## [1] 32772    26

GOseq requires a vector of all genes and all differentially expressed genes.

## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  1                  1                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## NULL
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  1                  1                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## NULL
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## NULL
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  1                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  1                  1
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  0                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  0                  1
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  1                  0 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  1                  1
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816
## adi2mcaRNA10013_R0 adi2mcaRNA10017_R1 adi2mcaRNA10018_R0 adi2mcaRNA10022_R0 
##                  0                  0                  1                  1 
## adi2mcaRNA10025_R0 adi2mcaRNA10043_R1 
##                  1                  0
## [1] "adi2mcaRNA10013_R0" "adi2mcaRNA10017_R1" "adi2mcaRNA10018_R0"
## [4] "adi2mcaRNA10022_R0" "adi2mcaRNA10025_R0" "adi2mcaRNA10043_R1"
## [1] 26063   880  5012 57034 18060  4816

Prepare GO term dataframe

GO.annot <- select(geneInfo, gene_id, Annotation.GO.ID)
splitted <- strsplit(as.character(GO.annot$Annotation.GO.ID), ";") #split into multiple GO ids
GO.terms <- data.frame(v1 = rep.int(GO.annot$gene_id, sapply(splitted, length)), v2 = unlist(splitted)) #list all genes with each of their GO terms in a single row
colnames(GO.terms) <- c("gene_id", "GO.ID")
head(GO.terms)
##   gene_id       GO.ID
## 1  g45338  GO:0046872
## 2  g52470  GO:0005085
## 3  g47993  GO:0030705
## 4  g29630  GO:0035556
## 5  g47496  GO:0004672
## 6  g47496  GO:0005515
tail(GO.terms)
##                  gene_id      GO.ID
## 44399             g20698 GO:0006355
## 44400             g20698 GO:0021520
## 44401             g20698 GO:0043565
## 44402 adi2mcaRNA16131_R0 GO:0003953
## 44403 adi2mcaRNA16131_R0 GO:0003953
## 44404             g41986 GO:0005515
GO.terms$GO.ID<- as.character(GO.terms$GO.ID)
GO.terms$GO.ID <- replace_na(GO.terms$GO.ID, "unknown")
GO.terms$GO.ID <- as.factor(GO.terms$GO.ID)
GO.terms$gene_id <- as.factor(GO.terms$gene_id)
GO.terms$GO.ID <- gsub(" ", "", GO.terms$GO.ID)
GO.terms <- unique(GO.terms)

dim(GO.terms)
## [1] 41816     2
head(GO.terms, 10)
##    gene_id      GO.ID
## 1   g45338 GO:0046872
## 2   g52470 GO:0005085
## 3   g47993 GO:0030705
## 4   g29630 GO:0035556
## 5   g47496 GO:0004672
## 6   g47496 GO:0005515
## 7   g47496 GO:0005524
## 8   g47496 GO:0006468
## 9   g17830    unknown
## 10  g64426 GO:0006811
tail(GO.terms, 10)
##                  gene_id      GO.ID
## 44393             g41833 GO:0005515
## 44394             g63674 GO:0016020
## 44395             g20698 GO:0000981
## 44396             g20698 GO:0003677
## 44397             g20698 GO:0006355
## 44398             g20698 GO:0005634
## 44400             g20698 GO:0021520
## 44401             g20698 GO:0043565
## 44402 adi2mcaRNA16131_R0 GO:0003953
## 44404             g41986 GO:0005515

Find enriched GO terms, “selection-unbiased testing for category enrichment amongst significantly expressed genes for RNA-seq data”

GOwall.Mat <- goseq(pwf.Mat, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
dim(GOwall.Mat)
## [1] 2457    7
head(GOwall.Mat)
##        category over_represented_pvalue under_represented_pvalue numDEInCat
## 1031 GO:0007034            1.421657e-06                0.9999999         10
## 707  GO:0005829            2.074599e-06                0.9999998         13
## 1786 GO:0031966            1.035405e-05                1.0000000          6
## 62   GO:0000387            1.297867e-05                0.9999992          8
## 209  GO:0003884            2.563130e-05                1.0000000          6
## 2105 GO:0046416            2.563130e-05                1.0000000          6
##      numInCat                           term ontology
## 1031       14             vacuolar transport       BP
## 707        22                        cytosol       CC
## 1786        6         mitochondrial membrane       CC
## 62         12    spliceosomal snRNP assembly       BP
## 209         6  D-amino-acid oxidase activity       MF
## 2105        6 D-amino acid metabolic process       BP
GOwall.UE <- goseq(pwf.UE, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.FE <- goseq(pwf.FE, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.ZGA1 <- goseq(pwf.ZGA1, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.Clvg <- goseq(pwf.Clvg, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.PC <- goseq(pwf.PC, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.EG <- goseq(pwf.EG, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.ZGA2 <- goseq(pwf.ZGA2, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.MG <- goseq(pwf.MG, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.LG <- goseq(pwf.LG, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.Pln <- goseq(pwf.Pln, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)
GOwall.Adult <- goseq(pwf.Adult, GOref$gene_id, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)

Find only enriched GO terms that are statistically significant at cutoff

Mat.GO.05<-GOwall.Mat$category[GOwall.Mat$over_represented_pvalue<.05]
Mat.GO.05<-data.frame(Mat.GO.05)
colnames(Mat.GO.05) <- c("category")
Mat.GO.05 <- merge(Mat.GO.05, GOwall.Mat, by="category")
Mat.GO.05 <- Mat.GO.05[order(Mat.GO.05$ontology, Mat.GO.05$over_represented_pvalue, -Mat.GO.05$numDEInCat),]
Mat.GO.05$term <- as.factor(Mat.GO.05$term)
dim(Mat.GO.05) #Number of sig GO terms
## [1] 216   7
nrow(filter(Mat.GO.05, ontology=="BP")) #number sig BP terms
## [1] 92
UE.GO.05<-GOwall.UE$category[GOwall.UE$over_represented_pvalue<.05]
UE.GO.05<-data.frame(UE.GO.05)
colnames(UE.GO.05) <- c("category")
UE.GO.05 <- merge(UE.GO.05, GOwall.UE, by="category")
UE.GO.05 <- UE.GO.05[order(UE.GO.05$ontology, UE.GO.05$over_represented_pvalue, -UE.GO.05$numDEInCat),]
UE.GO.05$term <- as.factor(UE.GO.05$term)
dim(UE.GO.05) #Number of sig GO terms
## [1] 202   7
nrow(filter(UE.GO.05, ontology=="BP")) #number sig BP terms
## [1] 83
FE.GO.05<-GOwall.FE$category[GOwall.FE$over_represented_pvalue<.05]
FE.GO.05<-data.frame(FE.GO.05)
colnames(FE.GO.05) <- c("category")
FE.GO.05 <- merge(FE.GO.05, GOwall.FE, by="category")
FE.GO.05 <- FE.GO.05[order(FE.GO.05$ontology, FE.GO.05$over_represented_pvalue, -FE.GO.05$numDEInCat),]
FE.GO.05$term <- as.factor(FE.GO.05$term)
dim(FE.GO.05) #Number of sig GO terms
## [1] 153   7
nrow(filter(FE.GO.05, ontology=="BP")) #number sig BP terms
## [1] 68
ZGA1.GO.05<-GOwall.ZGA1$category[GOwall.ZGA1$over_represented_pvalue<.05]
ZGA1.GO.05<-data.frame(ZGA1.GO.05)
colnames(ZGA1.GO.05) <- c("category")
ZGA1.GO.05 <- merge(ZGA1.GO.05, GOwall.ZGA1, by="category")
ZGA1.GO.05 <- ZGA1.GO.05[order(ZGA1.GO.05$ontology, ZGA1.GO.05$over_represented_pvalue, -ZGA1.GO.05$numDEInCat),]
ZGA1.GO.05$term <- as.factor(ZGA1.GO.05$term)
dim(ZGA1.GO.05) #Number of sig GO terms
## [1] 136   7
nrow(filter(ZGA1.GO.05, ontology=="BP")) #number sig BP terms
## [1] 55
Clvg.GO.05<-GOwall.Clvg$category[GOwall.Clvg$over_represented_pvalue<.05]
Clvg.GO.05<-data.frame(Clvg.GO.05)
colnames(Clvg.GO.05) <- c("category")
Clvg.GO.05 <- merge(Clvg.GO.05, GOwall.Clvg, by="category")
Clvg.GO.05 <- Clvg.GO.05[order(Clvg.GO.05$ontology, Clvg.GO.05$over_represented_pvalue, -Clvg.GO.05$numDEInCat),]
Clvg.GO.05$term <- as.factor(Clvg.GO.05$term)
dim(Clvg.GO.05) #Number of sig GO terms
## [1] 106   7
nrow(filter(Clvg.GO.05, ontology=="BP")) #number sig BP terms
## [1] 45
PC.GO.05<-GOwall.PC$category[GOwall.PC$over_represented_pvalue<.05]
PC.GO.05<-data.frame(PC.GO.05)
colnames(PC.GO.05) <- c("category")
PC.GO.05 <- merge(PC.GO.05, GOwall.PC, by="category")
PC.GO.05 <- PC.GO.05[order(PC.GO.05$ontology, PC.GO.05$over_represented_pvalue, -PC.GO.05$numDEInCat),]
PC.GO.05$term <- as.factor(PC.GO.05$term)
dim(PC.GO.05) #Number of sig GO terms
## [1] 114   7
nrow(filter(PC.GO.05, ontology=="BP")) #number sig BP terms
## [1] 52
EG.GO.05<-GOwall.EG$category[GOwall.EG$over_represented_pvalue<.05]
EG.GO.05<-data.frame(EG.GO.05)
colnames(EG.GO.05) <- c("category")
EG.GO.05 <- merge(EG.GO.05, GOwall.EG, by="category")
EG.GO.05 <- EG.GO.05[order(EG.GO.05$ontology, EG.GO.05$over_represented_pvalue, -EG.GO.05$numDEInCat),]
EG.GO.05$term <- as.factor(EG.GO.05$term)
dim(EG.GO.05) #Number of sig GO terms
## [1] 113   7
nrow(filter(EG.GO.05, ontology=="BP")) #number sig BP terms
## [1] 48
ZGA2.GO.05<-GOwall.ZGA2$category[GOwall.ZGA2$over_represented_pvalue<.05]
ZGA2.GO.05<-data.frame(ZGA2.GO.05)
colnames(ZGA2.GO.05) <- c("category")
ZGA2.GO.05 <- merge(ZGA2.GO.05, GOwall.ZGA2, by="category")
ZGA2.GO.05 <- ZGA2.GO.05[order(ZGA2.GO.05$ontology, ZGA2.GO.05$over_represented_pvalue, -ZGA2.GO.05$numDEInCat),]
ZGA2.GO.05$term <- as.factor(ZGA2.GO.05$term)
dim(ZGA2.GO.05) #Number of sig GO terms
## [1] 85  7
nrow(filter(ZGA2.GO.05, ontology=="BP")) #number sig BP terms
## [1] 22
MG.GO.05<-GOwall.MG$category[GOwall.MG$over_represented_pvalue<.05]
MG.GO.05<-data.frame(MG.GO.05)
colnames(MG.GO.05) <- c("category")
MG.GO.05 <- merge(MG.GO.05, GOwall.MG, by="category")
MG.GO.05 <- MG.GO.05[order(MG.GO.05$ontology, MG.GO.05$over_represented_pvalue, -MG.GO.05$numDEInCat),]
MG.GO.05$term <- as.factor(MG.GO.05$term)
dim(MG.GO.05) #Number of sig GO terms
## [1] 140   7
nrow(filter(MG.GO.05, ontology=="BP")) #number sig BP terms
## [1] 65
LG.GO.05<-GOwall.LG$category[GOwall.LG$over_represented_pvalue<.05]
LG.GO.05<-data.frame(LG.GO.05)
colnames(LG.GO.05) <- c("category")
LG.GO.05 <- merge(LG.GO.05, GOwall.LG, by="category")
LG.GO.05 <- LG.GO.05[order(LG.GO.05$ontology, LG.GO.05$over_represented_pvalue, -LG.GO.05$numDEInCat),]
LG.GO.05$term <- as.factor(LG.GO.05$term)
dim(LG.GO.05) #Number of sig GO terms
## [1] 85  7
nrow(filter(LG.GO.05, ontology=="BP")) #number sig BP terms
## [1] 34
Pln.GO.05<-GOwall.Pln$category[GOwall.Pln$over_represented_pvalue<.05]
Pln.GO.05<-data.frame(Pln.GO.05)
colnames(Pln.GO.05) <- c("category")
Pln.GO.05 <- merge(Pln.GO.05, GOwall.Pln, by="category")
Pln.GO.05 <- Pln.GO.05[order(Pln.GO.05$ontology, Pln.GO.05$over_represented_pvalue, -Pln.GO.05$numDEInCat),]
Pln.GO.05$term <- as.factor(Pln.GO.05$term)
dim(Pln.GO.05) #Number of sig GO terms
## [1] 87  7
nrow(filter(Pln.GO.05, ontology=="BP")) #number sig BP terms
## [1] 24
Adult.GO.05<-GOwall.Adult$category[GOwall.Adult$over_represented_pvalue<.05]
Adult.GO.05<-data.frame(Adult.GO.05)
colnames(Adult.GO.05) <- c("category")
Adult.GO.05 <- merge(Adult.GO.05, GOwall.Adult, by="category")
Adult.GO.05 <- Adult.GO.05[order(Adult.GO.05$ontology, Adult.GO.05$over_represented_pvalue, -Adult.GO.05$numDEInCat),]
Adult.GO.05$term <- as.factor(Adult.GO.05$term)
dim(Adult.GO.05) #Number of sig GO terms
## [1] 93  7
nrow(filter(Adult.GO.05, ontology=="BP")) #number sig BP terms
## [1] 31

Correct any un-annotated terms/ontologies

NAs.ontology <- Mat.GO.05 %>% subset(is.na(term))
print(NAs.ontology)
## [1] category                 over_represented_pvalue  under_represented_pvalue
## [4] numDEInCat               numInCat                 term                    
## [7] ontology                
## <0 rows> (or 0-length row.names)
NAs.ontology <- ZGA1.GO.05 %>% subset(is.na(term))
print(NAs.ontology)
## [1] category                 over_represented_pvalue  under_represented_pvalue
## [4] numDEInCat               numInCat                 term                    
## [7] ontology                
## <0 rows> (or 0-length row.names)
NAs.ontology <- ZGA2.GO.05 %>% subset(is.na(term))
print(NAs.ontology)
##    category over_represented_pvalue under_represented_pvalue numDEInCat
## 85  unknown             0.008588414                0.9924088       1041
##    numInCat term ontology
## 85     2105 <NA>     <NA>
NAs.ontology <- Adult.GO.05 %>% subset(is.na(term))
print(NAs.ontology)
##    category over_represented_pvalue under_represented_pvalue numDEInCat
## 93  unknown            3.172716e-19                        1       1234
##    numInCat term ontology
## 93     2105 <NA>     <NA>

There were no un-annotated functions except genes with unknown functions

Save significant terms

write.csv(Mat.GO.05, file = "2a-WGCNA/Output/GO.05.Mat.csv", row.names = FALSE)
write.csv(UE.GO.05, file = "2a-WGCNA/Output/GO.05.UE.csv", row.names = FALSE)
write.csv(FE.GO.05, file = "2a-WGCNA/Output/GO.05.FE.csv", row.names = FALSE)
write.csv(ZGA1.GO.05, file = "2a-WGCNA/Output/GO.05.ZGA1.csv", row.names = FALSE)
write.csv(Clvg.GO.05, file = "2a-WGCNA/Output/GO.05.Clvg.csv", row.names = FALSE)
write.csv(PC.GO.05, file = "2a-WGCNA/Output/GO.05.PC.csv", row.names = FALSE)
write.csv(EG.GO.05, file = "2a-WGCNA/Output/GO.05.EG.csv", row.names = FALSE)
write.csv(ZGA2.GO.05, file = "2a-WGCNA/Output/GO.05.ZGA2.csv", row.names = FALSE)
write.csv(MG.GO.05, file = "2a-WGCNA/Output/GO.05.MG.csv", row.names = FALSE)
write.csv(LG.GO.05, file = "2a-WGCNA/Output/GO.05.LG.csv", row.names = FALSE)
write.csv(Pln.GO.05, file = "2a-WGCNA/Output/GO.05.Pln.csv", row.names = FALSE)
write.csv(Adult.GO.05, file = "2a-WGCNA/Output/GO.05.Adult.csv", row.names = FALSE)

KEGG enrichment of maternal modules

KEGG Pathway Analysis of Maternal Transcriptome

Prepare a KEGG vector of differentially expressed

# head(KEGGinfo)
# 
# KEGGinfo <- select(geneInfo, gene_id, KEGG)
# KEGGinfo <- filter(KEGGinfo, KEGG != "0") #Keep on Kegg IDs in Kegg column
# KEGGinfo <- unique(KEGGinfo)
# KEGGinfo$KEGG <- as.factor(KEGGinfo$KEGG) #Every column must be a factor
# KEGGinfo$gene_id <- as.factor(KEGGinfo$gene_id) #Every column must be a factor
# dim(KEGGinfo)
# head(KEGGinfo)
# 
# 
# 
# KEGG_input_maternal <- filter(KEGGinfo, gene_id%in%genesMat.GO.sig$gene_id) #Filter for gene IDs in the maternal set
# head(KEGG_input_maternal)
# dim(KEGG_input_maternal)
# write.table(KEGG_input_maternal, "Output/RNAseq/maternal.05.KEGG_mapper_input.txt", quote=FALSE,col.names=FALSE,row.names=FALSE,sep="\t") #TO BE USED FOR KEGG Mapper https://www.genome.jp/kegg/mapper.html.

Perform enrichment analysis

# KEGG.05.maternal <- enrichKEGG(KEGG_input_maternal$KEGG, organism = "ko", keyType = "kegg", pvalueCutoff = 0.05) #run enrichment with Kegg sig < 0.05
# KEGG.05.maternal <- KEGG.05.maternal[order(KEGG.05.maternal$p.adjust, -KEGG.05.maternal$Count),] #order by p-value, then count
# head(KEGG.05.maternal) #analysis of DE genes
# dim(KEGG.05.maternal)
# write.csv(KEGG.05.maternal, file = "Output/RNAseq/KEGG.05.maternal.csv")

Obtain signicant GO terms for each module

Make a list of all module colors

moduleColor_list <- unique(geneColor$moduleColor)
moduleColor_list
##  [1] "antiquewhite2"  "antiquewhite4"  "blue"           "blue2"         
##  [5] "blue4"          "blueviolet"     "brown2"         "coral"         
##  [9] "coral1"         "cyan"           "darkmagenta"    "darkseagreen4" 
## [13] "darkslateblue"  "grey"           "honeydew1"      "indianred3"    
## [17] "ivory"          "lavenderblush2" "lightslateblue" "lightsteelblue"
## [21] "magenta4"       "mediumpurple1"  "mediumpurple3"  "midnightblue"  
## [25] "navajowhite1"   "plum3"          "plum4"          "salmon"        
## [29] "salmon4"        "sienna3"        "skyblue1"       "thistle"       
## [33] "thistle4"       "violet"

Obtain module color of all expressed genes (poverA = 0.85,5).

genes.GO$gene_id <- as.factor(genes.GO$gene_id) #Make factor for merge
genes.GO <- merge(geneColor, genes.GO) #append module information
genes.GO$moduleColor <- as.factor(genes.GO$moduleColor) #Make factor for filter
head(genes.GO)
##              gene_id moduleColor      X119      X120      X121      X127
## 1 adi2mcaRNA10013_R0   honeydew1 13.856733 13.718850 14.020625 13.874230
## 2 adi2mcaRNA10017_R1   honeydew1 10.130987 10.286848 10.131647  9.909535
## 3 adi2mcaRNA10018_R0        blue  8.873991  8.873991  8.873991  8.873991
## 4 adi2mcaRNA10022_R0        cyan 10.323999 10.436964 10.185860 10.305153
## 5 adi2mcaRNA10025_R0        blue  8.873991  8.873991  8.873991  8.873991
## 6 adi2mcaRNA10043_R1     sienna3  8.873991  8.873991  8.873991  8.873991
##        X132      X153      X154      X159      X162      X163      X167
## 1 13.986782 14.038510 14.281777 14.407036 13.730070 13.862217 13.757265
## 2  9.869312  9.780544  9.825871  9.793535 10.025731  9.985553  9.950188
## 3  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991
## 4 10.566842 10.291471 10.108466 10.053651  9.690496  9.691840  9.711188
## 5  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991
## 6  8.873991  8.873991  8.873991  8.873991  9.109010  8.873991  8.873991
##        X179      X180      X184      X212      X215      X218      X221
## 1 12.225041 11.888140 12.070319 11.264607 11.262483 11.524604 11.469398
## 2  9.759188  9.807519  9.833320  9.527062  9.650037  9.458826  9.292660
## 3  9.082857  8.873991  9.028027  8.962390  8.873991  8.873991  9.012454
## 4  9.777535  9.493523  9.783506  9.081160  9.329598  9.414579  9.470327
## 5  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991  8.873991
## 6  8.873991  8.873991  8.873991  9.598113  9.426005 10.588162 10.566363
##        X359      X361      X375     X1101     X1548     X1628
## 1 11.856368 11.760712 11.971630 11.856902 11.747998 11.391425
## 2  9.344819  9.354966  9.290794  9.341403  9.412548  9.311949
## 3  9.270761  9.259076  9.271063  8.873991  9.104723  9.124311
## 4 10.211142 10.159268 10.249013 10.800664 11.260274 11.370141
## 5  9.513902  9.358264  9.601831  9.521989  9.820412  9.592289
## 6 10.529897 10.637113 10.634318  9.039879  8.873991  9.133735
dim(genes.GO)
## [1] 32772    26

Obtain list of significantly-expressed genes for each module

antiquewhite2.sig <- filter(genes.GO, moduleColor=="antiquewhite2")
antiquewhite4.sig <- filter(genes.GO, moduleColor=="antiquewhite4")
blue.sig <- filter(genes.GO, moduleColor=="blue")
blue2.sig <- filter(genes.GO, moduleColor=="blue2")
blue4.sig <- filter(genes.GO, moduleColor=="blue4")
blueviolet.sig <- filter(genes.GO, moduleColor=="blueviolet")
brown2.sig <- filter(genes.GO, moduleColor=="brown2")
coral.sig <- filter(genes.GO, moduleColor=="coral")
coral1.sig <- filter(genes.GO, moduleColor=="coral1")
cyan.sig <- filter(genes.GO, moduleColor=="cyan")
darkmagenta.sig <- filter(genes.GO, moduleColor=="darkmagenta")
darkseagreen4.sig <- filter(genes.GO, moduleColor=="darkseagreen4")
darkslateblue.sig <- filter(genes.GO, moduleColor=="darkslateblue")
grey.sig <- filter(genes.GO, moduleColor=="grey")
honeydew1.sig <- filter(genes.GO, moduleColor=="honeydew1")
indianred3.sig <- filter(genes.GO, moduleColor=="indianred3")
ivory.sig <- filter(genes.GO, moduleColor=="ivory")
lavenderblush2.sig <- filter(genes.GO, moduleColor=="lavenderblush2")
lightslateblue.sig <- filter(genes.GO, moduleColor=="lightslateblue")
lightsteelblue.sig <- filter(genes.GO, moduleColor=="lightsteelblue")
magenta4.sig <- filter(genes.GO, moduleColor=="magenta4")
mediumpurple1.sig <- filter(genes.GO, moduleColor=="mediumpurple1")
mediumpurple3.sig <- filter(genes.GO, moduleColor=="mediumpurple3")
midnightblue.sig <- filter(genes.GO, moduleColor=="midnightblue")
navajowhite1.sig <- filter(genes.GO, moduleColor=="navajowhite1")
plum3.sig <- filter(genes.GO, moduleColor=="plum3")
plum4.sig <- filter(genes.GO, moduleColor=="plum4")
salmon.sig <- filter(genes.GO, moduleColor=="salmon")
salmon4.sig <- filter(genes.GO, moduleColor=="salmon4")
sienna3.sig <- filter(genes.GO, moduleColor=="sienna3")
skyblue1.sig <- filter(genes.GO, moduleColor=="skyblue1")
thistle.sig <- filter(genes.GO, moduleColor=="thistle")
thistle4.sig <- filter(genes.GO, moduleColor=="thistle4")
violet.sig <- filter(genes.GO, moduleColor=="violet")

GOseq requires a vector of all genes and all differentially expressed genes.

antiquewhite2.vector <- as.vector(antiquewhite2.sig$gene_id)
antiquewhite2.vector=as.integer(GOref$gene_id%in%antiquewhite2.vector)
names(antiquewhite2.vector)=GOref$gene_id

antiquewhite4.vector <- as.vector(antiquewhite4.sig$gene_id)
antiquewhite4.vector=as.integer(GOref$gene_id%in%antiquewhite4.vector)
names(antiquewhite4.vector)=GOref$gene_id

blue.vector <- as.vector(blue.sig$gene_id)
blue.vector=as.integer(GOref$gene_id%in%blue.vector)
names(blue.vector)=GOref$gene_id

blue2.vector <- as.vector(blue2.sig$gene_id)
blue2.vector=as.integer(GOref$gene_id%in%blue2.vector)
names(blue2.vector)=GOref$gene_id

blue4.vector <- as.vector(blue4.sig$gene_id)
blue4.vector=as.integer(GOref$gene_id%in%blue4.vector)
names(blue4.vector)=GOref$gene_id

blueviolet.vector <- as.vector(blueviolet.sig$gene_id)
blueviolet.vector=as.integer(GOref$gene_id%in%blueviolet.vector)
names(blueviolet.vector)=GOref$gene_id

brown2.vector <- as.vector(brown2.sig$gene_id)
brown2.vector=as.integer(GOref$gene_id%in%brown2.vector)
names(brown2.vector)=GOref$gene_id

coral.vector <- as.vector(coral.sig$gene_id)
coral.vector=as.integer(GOref$gene_id%in%coral.vector)
names(coral.vector)=GOref$gene_id

coral1.vector <- as.vector(coral1.sig$gene_id)
coral1.vector=as.integer(GOref$gene_id%in%coral1.vector)
names(coral1.vector)=GOref$gene_id

cyan.vector <- as.vector(cyan.sig$gene_id)
cyan.vector=as.integer(GOref$gene_id%in%cyan.vector)
names(cyan.vector)=GOref$gene_id

darkmagenta.vector <- as.vector(darkmagenta.sig$gene_id)
darkmagenta.vector=as.integer(GOref$gene_id%in%darkmagenta.vector)
names(darkmagenta.vector)=GOref$gene_id

darkseagreen4.vector <- as.vector(darkseagreen4.sig$gene_id)
darkseagreen4.vector=as.integer(GOref$gene_id%in%darkseagreen4.vector)
names(darkseagreen4.vector)=GOref$gene_id

darkslateblue.vector <- as.vector(darkslateblue.sig$gene_id)
darkslateblue.vector=as.integer(GOref$gene_id%in%darkslateblue.vector)
names(darkslateblue.vector)=GOref$gene_id

grey.vector <- as.vector(grey.sig$gene_id)
grey.vector=as.integer(GOref$gene_id%in%grey.vector)
names(grey.vector)=GOref$gene_id

honeydew1.vector <- as.vector(honeydew1.sig$gene_id)
honeydew1.vector=as.integer(GOref$gene_id%in%honeydew1.vector)
names(honeydew1.vector)=GOref$gene_id

indianred3.vector <- as.vector(indianred3.sig$gene_id)
indianred3.vector=as.integer(GOref$gene_id%in%indianred3.vector)
names(indianred3.vector)=GOref$gene_id

ivory.vector <- as.vector(ivory.sig$gene_id)
ivory.vector=as.integer(GOref$gene_id%in%ivory.vector)
names(ivory.vector)=GOref$gene_id

lavenderblush2.vector <- as.vector(lavenderblush2.sig$gene_id)
lavenderblush2.vector=as.integer(GOref$gene_id%in%lavenderblush2.vector)
names(lavenderblush2.vector)=GOref$gene_id

lightslateblue.vector <- as.vector(lightslateblue.sig$gene_id)
lightslateblue.vector=as.integer(GOref$gene_id%in%lightslateblue.vector)
names(lightslateblue.vector)=GOref$gene_id

lightsteelblue.vector <- as.vector(lightsteelblue.sig$gene_id)
lightsteelblue.vector=as.integer(GOref$gene_id%in%lightsteelblue.vector)
names(lightsteelblue.vector)=GOref$gene_id

magenta4.vector <- as.vector(magenta4.sig$gene_id)
magenta4.vector=as.integer(GOref$gene_id%in%magenta4.vector)
names(magenta4.vector)=GOref$gene_id

mediumpurple1.vector <- as.vector(mediumpurple1.sig$gene_id)
mediumpurple1.vector=as.integer(GOref$gene_id%in%mediumpurple1.vector)
names(mediumpurple1.vector)=GOref$gene_id

mediumpurple3.vector <- as.vector(mediumpurple3.sig$gene_id)
mediumpurple3.vector=as.integer(GOref$gene_id%in%mediumpurple3.vector)
names(mediumpurple3.vector)=GOref$gene_id

midnightblue.vector <- as.vector(midnightblue.sig$gene_id)
midnightblue.vector=as.integer(GOref$gene_id%in%midnightblue.vector)
names(midnightblue.vector)=GOref$gene_id

navajowhite1.vector <- as.vector(navajowhite1.sig$gene_id)
navajowhite1.vector=as.integer(GOref$gene_id%in%navajowhite1.vector)
names(navajowhite1.vector)=GOref$gene_id

plum3.vector <- as.vector(plum3.sig$gene_id)
plum3.vector=as.integer(GOref$gene_id%in%plum3.vector)
names(plum3.vector)=GOref$gene_id

plum4.vector <- as.vector(plum4.sig$gene_id)
plum4.vector=as.integer(GOref$gene_id%in%plum4.vector)
names(plum4.vector)=GOref$gene_id

salmon.vector <- as.vector(salmon.sig$gene_id)
salmon.vector=as.integer(GOref$gene_id%in%salmon.vector)
names(salmon.vector)=GOref$gene_id

salmon4.vector <- as.vector(salmon4.sig$gene_id)
salmon4.vector=as.integer(GOref$gene_id%in%salmon4.vector)
names(salmon4.vector)=GOref$gene_id

sienna3.vector <- as.vector(sienna3.sig$gene_id)
sienna3.vector=as.integer(GOref$gene_id%in%sienna3.vector)
names(sienna3.vector)=GOref$gene_id

skyblue1.vector <- as.vector(skyblue1.sig$gene_id)
skyblue1.vector=as.integer(GOref$gene_id%in%skyblue1.vector)
names(skyblue1.vector)=GOref$gene_id

thistle.vector <- as.vector(thistle.sig$gene_id)
thistle.vector=as.integer(GOref$gene_id%in%thistle.vector)
names(thistle.vector)=GOref$gene_id

thistle4.vector <- as.vector(thistle4.sig$gene_id)
thistle4.vector=as.integer(GOref$gene_id%in%thistle4.vector)
names(thistle4.vector)=GOref$gene_id

violet.vector <- as.vector(violet.sig$gene_id)
violet.vector=as.integer(GOref$gene_id%in%violet.vector)
names(violet.vector)=GOref$gene_id

vector.list <- list(antiquewhite2.vector, antiquewhite4.vector, blue.vector, blue2.vector, blue4.vector, blueviolet.vector, brown2.vector, coral.vector, coral1.vector, cyan.vector, darkmagenta.vector, darkseagreen4.vector, darkslateblue.vector, grey.vector, honeydew1.vector, indianred3.vector, ivory.vector, lavenderblush2.vector, lightslateblue.vector, lightsteelblue.vector, magenta4.vector, mediumpurple1.vector, mediumpurple3.vector, midnightblue.vector, navajowhite1.vector, plum3.vector, plum4.vector, salmon.vector, salmon4.vector, sienna3.vector, skyblue1.vector, thistle.vector, thistle4.vector, violet.vector)
length(vector.list)
## [1] 34

Calculate Probability Weighting Function

Perform goseq

GOwall <- lapply(pwf, goseq, gene2cat=GO.terms, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method="Wallenius", use_genes_without_cat=TRUE)

Find only enriched GO terms that are statistically significant at cutoff. Order by p-value and select top 10 most significant terms for each module.

GO.05<-lapply(GOwall, filter, over_represented_pvalue<.05)
GO.05 <- lapply(GO.05, function(GO.05){GO.05[order(GO.05$over_represented_pvalue, -GO.05$numDEInCat),]})
names(GO.05) = moduleColor_list #name the dataframes according to their color
GO.05.all <- bind_rows(GO.05, .id = "column_label") #merge the dataframes using the name as an identifier
GO.05.all <- rename(GO.05.all,"moduleColor"="column_label") #rename the identifying column
GO.05.all <- merge(moduleCluster, GO.05.all, by = "moduleColor") #Add cluster information
GO.05.all <- GO.05.all[order(factor(GO.05.all$moduleColor, levels = moduleCluster$moduleColor)),] #Match order to heatmap
GO.05.all <- subset(GO.05.all, select = c(2,1, 3:9)) #make cluster first, then module second

GO.05.top10 <- lapply(GO.05, "[",c(1:10),,drop=FALSE)

Merge list into 1 dataframe to obtain top 10

names(GO.05.top10) = moduleColor_list #name the dataframes according to their color
GO.05.top10.all <- bind_rows(GO.05.top10, .id = "column_label") #merge the dataframes using the name as an identifier
GO.05.top10.all <- rename(GO.05.top10.all, "moduleColor"="column_label") #rename the identifying column
GO.05.top10.all <- merge(moduleCluster, GO.05.top10.all, by = "moduleColor") #Add cluster information
GO.05.top10.all <- GO.05.top10.all[order(factor(GO.05.top10.all$moduleColor, levels = moduleCluster$moduleColor)),] #Match order to heatmap
GO.05.top10.all <- subset(GO.05.top10.all, select = c(2,1, 3:9)) #make cluster first, then module second
head(GO.05.top10.all)
##     moduleCluster moduleColor   category over_represented_pvalue
## 131             1        grey GO:0007224             0.002294852
## 132             1        grey GO:0005758             0.002739721
## 133             1        grey GO:0042162             0.003513099
## 134             1        grey GO:0043564             0.003513099
## 135             1        grey GO:0036128             0.003538448
## 136             1        grey GO:0006303             0.006480869
##     under_represented_pvalue numDEInCat numInCat
## 131                0.9999983          1        3
## 132                0.9999976          1        3
## 133                0.9999955          1        4
## 134                0.9999955          1        4
## 135                0.9999955          1        4
## 136                0.9999822          1        8
##                                                         term ontology
## 131                             smoothened signaling pathway       BP
## 132                        mitochondrial intermembrane space       CC
## 133                                    telomeric DNA binding       MF
## 134                                        Ku70:Ku80 complex       CC
## 135                                          CatSper complex       CC
## 136 double-strand break repair via nonhomologous end joining       BP
tail(GO.05.top10.all)
##     moduleCluster moduleColor   category over_represented_pvalue
## 285             9     salmon4 GO:0051603            1.375767e-11
## 286             9     salmon4 GO:0004298            1.253568e-08
## 287             9     salmon4 GO:0006614            1.855889e-05
## 288             9     salmon4 GO:0005634            2.293359e-05
## 289             9     salmon4 GO:0006511            1.208782e-04
## 290             9     salmon4 GO:0019843            2.757372e-04
##     under_represented_pvalue numDEInCat numInCat
## 285                1.0000000          6       10
## 286                1.0000000          5       14
## 287                0.9999998          3        9
## 288                0.9999960         11      379
## 289                0.9999913          5       70
## 290                0.9999984          2        6
##                                                            term ontology
## 285  proteolysis involved in cellular protein catabolic process       BP
## 286                       threonine-type endopeptidase activity       MF
## 287 SRP-dependent cotranslational protein targeting to membrane       BP
## 288                                                     nucleus       CC
## 289               ubiquitin-dependent protein catabolic process       BP
## 290                                                rRNA binding       MF
str(GO.05.top10.all)
## 'data.frame':    340 obs. of  9 variables:
##  $ moduleCluster           : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ moduleColor             : chr  "grey" "grey" "grey" "grey" ...
##  $ category                : chr  "GO:0007224" "GO:0005758" "GO:0042162" "GO:0043564" ...
##  $ over_represented_pvalue : num  0.00229 0.00274 0.00351 0.00351 0.00354 ...
##  $ under_represented_pvalue: num  1 1 1 1 1 ...
##  $ numDEInCat              : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ numInCat                : int  3 3 4 4 4 8 10 2 2 8 ...
##  $ term                    : chr  "smoothened signaling pathway" "mitochondrial intermembrane space" "telomeric DNA binding" "Ku70:Ku80 complex" ...
##  $ ontology                : chr  "BP" "CC" "MF" "CC" ...

Save as CSV

#write.csv(GO.05.top10.all, file = "Output/RNAseq/WGCNA.GO.05.top10.csv")
write.csv(GO.05.all, file = "2a-WGCNA/Output/GO.05.allMods.csv")